1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
7 /*--------------------------------------------------------------------*/
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_aspacemgr.h"
34 #include "pub_tool_gdbserver.h"
35 #include "pub_tool_poolalloc.h"
36 #include "pub_tool_hashtable.h" // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_rangemap.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48 #include "pub_tool_xarray.h"
49 #include "pub_tool_xtree.h"
50 #include "pub_tool_xtmemory.h"
52 #include "mc_include.h"
53 #include "memcheck.h" /* for client requests */
55 /* Set to 1 to do a little more sanity checking */
56 #define VG_DEBUG_MEMORY 0
58 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
60 static void ocache_sarp_Set_Origins ( Addr
, UWord
, UInt
); /* fwds */
61 static void ocache_sarp_Clear_Origins ( Addr
, UWord
); /* fwds */
64 /*------------------------------------------------------------*/
65 /*--- Fast-case knobs ---*/
66 /*------------------------------------------------------------*/
68 // Comment these out to disable the fast cases (don't just set them to zero).
70 /* PERF_FAST_LOADV is in mc_include.h */
71 #define PERF_FAST_STOREV 1
73 #define PERF_FAST_SARP 1
75 #define PERF_FAST_STACK 1
76 #define PERF_FAST_STACK2 1
78 /* Change this to 1 to enable assertions on origin tracking cache fast
80 #define OC_ENABLE_ASSERTIONS 0
82 /* Change this to 1 for experimental, higher precision origin tracking
83 8- and 16-bit store handling. */
84 #define OC_PRECISION_STORE 1
87 /*------------------------------------------------------------*/
88 /*--- Comments on the origin tracking implementation ---*/
89 /*------------------------------------------------------------*/
91 /* See detailed comment entitled
92 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
93 which is contained further on in this file. */
96 /*------------------------------------------------------------*/
97 /*--- V bits and A bits ---*/
98 /*------------------------------------------------------------*/
100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
101 thinks the corresponding value bit is defined. And every memory byte
102 has an A bit, which tracks whether Memcheck thinks the program can access
103 it safely (ie. it's mapped, and has at least one of the RWX permission bits
104 set). So every N-bit register is shadowed with N V bits, and every memory
105 byte is shadowed with 8 V bits and one A bit.
107 In the implementation, we use two forms of compression (compressed V bits
108 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
111 Memcheck also tracks extra information about each heap block that is
112 allocated, for detecting memory leaks and other purposes.
115 /*------------------------------------------------------------*/
116 /*--- Basic A/V bitmap representation. ---*/
117 /*------------------------------------------------------------*/
119 /* All reads and writes are checked against a memory map (a.k.a. shadow
120 memory), which records the state of all memory in the process.
122 On 32-bit machines the memory map is organised as follows.
123 The top 16 bits of an address are used to index into a top-level
124 map table, containing 65536 entries. Each entry is a pointer to a
125 second-level map, which records the accesibililty and validity
126 permissions for the 65536 bytes indexed by the lower 16 bits of the
127 address. Each byte is represented by two bits (details are below). So
128 each second-level map contains 16384 bytes. This two-level arrangement
129 conveniently divides the 4G address space into 64k lumps, each size 64k
132 All entries in the primary (top-level) map must point to a valid
133 secondary (second-level) map. Since many of the 64kB chunks will
134 have the same status for every bit -- ie. noaccess (for unused
135 address space) or entirely addressable and defined (for code segments) --
136 there are three distinguished secondary maps, which indicate 'noaccess',
137 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
138 map entry points to the relevant distinguished map. In practice,
139 typically more than half of the addressable memory is represented with
140 the 'undefined' or 'defined' distinguished secondary map, so it gives a
141 good saving. It also lets us set the V+A bits of large address regions
142 quickly in set_address_range_perms().
144 On 64-bit machines it's more complicated. If we followed the same basic
145 scheme we'd have a four-level table which would require too many memory
146 accesses. So instead the top-level map table has 2^20 entries (indexed
147 using bits 16..35 of the address); this covers the bottom 64GB. Any
148 accesses above 64GB are handled with a slow, sparse auxiliary table.
149 Valgrind's address space manager tries very hard to keep things below
150 this 64GB barrier so that performance doesn't suffer too much.
152 Note that this file has a lot of different functions for reading and
153 writing shadow memory. Only a couple are strictly necessary (eg.
154 get_vabits2 and set_vabits2), most are just specialised for specific
155 common cases to improve performance.
157 Aside: the V+A bits are less precise than they could be -- we have no way
158 of marking memory as read-only. It would be great if we could add an
159 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
160 which requires 2.3 bits to hold, and there's no way to do that elegantly
161 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
165 /* --------------- Basic configuration --------------- */
167 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
171 /* cover the entire address space */
172 # define N_PRIMARY_BITS 16
176 /* Just handle the first 128G fast and the rest via auxiliary
177 primaries. If you change this, Memcheck will assert at startup.
178 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
179 # define N_PRIMARY_BITS 21
184 /* Do not change this. */
185 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
187 /* Do not change this. */
188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
191 /* --------------- Secondary maps --------------- */
193 // Each byte of memory conceptually has an A bit, which indicates its
194 // addressability, and 8 V bits, which indicates its definedness.
196 // But because very few bytes are partially defined, we can use a nice
197 // compression scheme to reduce the size of shadow memory. Each byte of
198 // memory has 2 bits which indicates its state (ie. V+A bits):
200 // 00: noaccess (unaddressable but treated as fully defined)
201 // 01: undefined (addressable and fully undefined)
202 // 10: defined (addressable and fully defined)
203 // 11: partdefined (addressable and partially defined)
205 // In the "partdefined" case, we use a secondary table to store the V bits.
206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
210 // four bytes (32 bits) of memory are in each chunk. Hence the name
211 // "vabits8". This lets us get the V+A bits for four bytes at a time
212 // easily (without having to do any shifting and/or masking), and that is a
213 // very common operation. (Note that although each vabits8 chunk
214 // is 8 bits in size, it represents 32 bits of memory.)
216 // The representation is "inverse" little-endian... each 4 bytes of
217 // memory is represented by a 1 byte value, where:
219 // - the status of byte (a+0) is held in bits [1..0]
220 // - the status of byte (a+1) is held in bits [3..2]
221 // - the status of byte (a+2) is held in bits [5..4]
222 // - the status of byte (a+3) is held in bits [7..6]
224 // It's "inverse" because endianness normally describes a mapping from
225 // value bits to memory addresses; in this case the mapping is inverted.
226 // Ie. instead of particular value bits being held in certain addresses, in
227 // this case certain addresses are represented by particular value bits.
228 // See insert_vabits2_into_vabits8() for an example.
230 // But note that we don't compress the V bits stored in registers; they
231 // need to be explicit to made the shadow operations possible. Therefore
232 // when moving values between registers and memory we need to convert
233 // between the expanded in-register format and the compressed in-memory
234 // format. This isn't so difficult, it just requires careful attention in a
237 // These represent eight bits of memory.
238 #define VA_BITS2_NOACCESS 0x0 // 00b
239 #define VA_BITS2_UNDEFINED 0x1 // 01b
240 #define VA_BITS2_DEFINED 0x2 // 10b
241 #define VA_BITS2_PARTDEFINED 0x3 // 11b
243 // These represent 16 bits of memory.
244 #define VA_BITS4_NOACCESS 0x0 // 00_00b
245 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
246 #define VA_BITS4_DEFINED 0xa // 10_10b
248 // These represent 32 bits of memory.
249 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
250 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
251 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
253 // These represent 64 bits of memory.
254 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
255 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
256 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
258 // These represent 128 bits of memory.
259 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
262 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
263 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
264 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
266 // Paranoia: it's critical for performance that the requested inlining
267 // occurs. So try extra hard.
268 #define INLINE inline __attribute__((always_inline))
270 static INLINE Addr
start_of_this_sm ( Addr a
) {
271 return (a
& (~SM_MASK
));
273 static INLINE Bool
is_start_of_sm ( Addr a
) {
274 return (start_of_this_sm(a
) == a
);
277 STATIC_ASSERT(SM_CHUNKS
% 2 == 0);
281 UChar vabits8
[SM_CHUNKS
];
282 UShort vabits16
[SM_CHUNKS
/2];
286 // 3 distinguished secondary maps, one for no-access, one for
287 // accessible but undefined, and one for accessible and defined.
288 // Distinguished secondaries may never be modified.
289 #define SM_DIST_NOACCESS 0
290 #define SM_DIST_UNDEFINED 1
291 #define SM_DIST_DEFINED 2
293 static SecMap sm_distinguished
[3];
295 static INLINE Bool
is_distinguished_sm ( SecMap
* sm
) {
296 return sm
>= &sm_distinguished
[0] && sm
<= &sm_distinguished
[2];
299 // Forward declaration
300 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
);
302 /* dist_sm points to one of our three distinguished secondaries. Make
303 a copy of it so that we can write to it.
305 static SecMap
* copy_for_writing ( SecMap
* dist_sm
)
308 tl_assert(dist_sm
== &sm_distinguished
[0]
309 || dist_sm
== &sm_distinguished
[1]
310 || dist_sm
== &sm_distinguished
[2]);
312 SysRes sres
= VG_(am_shadow_alloc
)(sizeof(SecMap
));
313 if (sr_isError(sres
))
314 VG_(out_of_memory_NORETURN
)( "memcheck:allocate new SecMap",
315 sizeof(SecMap
), sr_Err(sres
) );
316 new_sm
= (void *)(Addr
)sr_Res(sres
);
317 VG_(memcpy
)(new_sm
, dist_sm
, sizeof(SecMap
));
318 update_SM_counts(dist_sm
, new_sm
);
322 /* --------------- Stats --------------- */
324 static Int n_issued_SMs
= 0;
325 static Int n_deissued_SMs
= 0;
326 static Int n_noaccess_SMs
= N_PRIMARY_MAP
; // start with many noaccess DSMs
327 static Int n_undefined_SMs
= 0;
328 static Int n_defined_SMs
= 0;
329 static Int n_non_DSM_SMs
= 0;
330 static Int max_noaccess_SMs
= 0;
331 static Int max_undefined_SMs
= 0;
332 static Int max_defined_SMs
= 0;
333 static Int max_non_DSM_SMs
= 0;
335 /* # searches initiated in auxmap_L1, and # base cmps required */
336 static ULong n_auxmap_L1_searches
= 0;
337 static ULong n_auxmap_L1_cmps
= 0;
338 /* # of searches that missed in auxmap_L1 and therefore had to
339 be handed to auxmap_L2. And the number of nodes inserted. */
340 static ULong n_auxmap_L2_searches
= 0;
341 static ULong n_auxmap_L2_nodes
= 0;
343 static Int n_sanity_cheap
= 0;
344 static Int n_sanity_expensive
= 0;
346 static Int n_secVBit_nodes
= 0;
347 static Int max_secVBit_nodes
= 0;
349 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
)
351 if (oldSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
--;
352 else if (oldSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
--;
353 else if (oldSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
--;
354 else { n_non_DSM_SMs
--;
357 if (newSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
++;
358 else if (newSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
++;
359 else if (newSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
++;
360 else { n_non_DSM_SMs
++;
363 if (n_noaccess_SMs
> max_noaccess_SMs
) max_noaccess_SMs
= n_noaccess_SMs
;
364 if (n_undefined_SMs
> max_undefined_SMs
) max_undefined_SMs
= n_undefined_SMs
;
365 if (n_defined_SMs
> max_defined_SMs
) max_defined_SMs
= n_defined_SMs
;
366 if (n_non_DSM_SMs
> max_non_DSM_SMs
) max_non_DSM_SMs
= n_non_DSM_SMs
;
369 /* --------------- Primary maps --------------- */
371 /* The main primary map. This covers some initial part of the address
372 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
373 handled using the auxiliary primary map.
375 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
376 && (defined(VGP_arm_linux) \
377 || defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
378 /* mc_main_asm.c needs visibility on a few things declared in this file.
379 MC_MAIN_STATIC allows to define them static if ok, i.e. on
380 platforms that are not using hand-coded asm statements. */
381 #define MC_MAIN_STATIC
383 #define MC_MAIN_STATIC static
385 MC_MAIN_STATIC SecMap
* primary_map
[N_PRIMARY_MAP
];
388 /* An entry in the auxiliary primary map. base must be a 64k-aligned
389 value, and sm points at the relevant secondary map. As with the
390 main primary map, the secondary may be either a real secondary, or
391 one of the three distinguished secondaries. DO NOT CHANGE THIS
392 LAYOUT: the first word has to be the key for OSet fast lookups.
401 /* Tunable parameter: How big is the L1 queue? */
402 #define N_AUXMAP_L1 24
404 /* Tunable parameter: How far along the L1 queue to insert
405 entries resulting from L2 lookups? */
406 #define AUXMAP_L1_INSERT_IX 12
410 AuxMapEnt
* ent
; // pointer to the matching auxmap_L2 node
412 auxmap_L1
[N_AUXMAP_L1
];
414 static OSet
* auxmap_L2
= NULL
;
416 static void init_auxmap_L1_L2 ( void )
419 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
420 auxmap_L1
[i
].base
= 0;
421 auxmap_L1
[i
].ent
= NULL
;
424 tl_assert(0 == offsetof(AuxMapEnt
,base
));
425 tl_assert(sizeof(Addr
) == sizeof(void*));
426 auxmap_L2
= VG_(OSetGen_Create
)( /*keyOff*/ offsetof(AuxMapEnt
,base
),
428 VG_(malloc
), "mc.iaLL.1", VG_(free
) );
431 /* Check representation invariants; if OK return NULL; else a
432 descriptive bit of text. Also return the number of
433 non-distinguished secondary maps referred to from the auxiliary
436 static const HChar
* check_auxmap_L1_L2_sanity ( Word
* n_secmaps_found
)
439 /* On a 32-bit platform, the L2 and L1 tables should
440 both remain empty forever.
442 On a 64-bit platform:
444 all .base & 0xFFFF == 0
445 all .base > MAX_PRIMARY_ADDRESS
447 all .base & 0xFFFF == 0
448 all (.base > MAX_PRIMARY_ADDRESS
450 and .ent points to an AuxMapEnt with the same .base)
452 (.base == 0 and .ent == NULL)
454 *n_secmaps_found
= 0;
455 if (sizeof(void*) == 4) {
456 /* 32-bit platform */
457 if (VG_(OSetGen_Size
)(auxmap_L2
) != 0)
458 return "32-bit: auxmap_L2 is non-empty";
459 for (i
= 0; i
< N_AUXMAP_L1
; i
++)
460 if (auxmap_L1
[i
].base
!= 0 || auxmap_L1
[i
].ent
!= NULL
)
461 return "32-bit: auxmap_L1 is non-empty";
463 /* 64-bit platform */
464 UWord elems_seen
= 0;
465 AuxMapEnt
*elem
, *res
;
468 VG_(OSetGen_ResetIter
)(auxmap_L2
);
469 while ( (elem
= VG_(OSetGen_Next
)(auxmap_L2
)) ) {
471 if (0 != (elem
->base
& (Addr
)0xFFFF))
472 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
473 if (elem
->base
<= MAX_PRIMARY_ADDRESS
)
474 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
475 if (elem
->sm
== NULL
)
476 return "64-bit: .sm in _L2 is NULL";
477 if (!is_distinguished_sm(elem
->sm
))
478 (*n_secmaps_found
)++;
480 if (elems_seen
!= n_auxmap_L2_nodes
)
481 return "64-bit: disagreement on number of elems in _L2";
482 /* Check L1-L2 correspondence */
483 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
484 if (auxmap_L1
[i
].base
== 0 && auxmap_L1
[i
].ent
== NULL
)
486 if (0 != (auxmap_L1
[i
].base
& (Addr
)0xFFFF))
487 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
488 if (auxmap_L1
[i
].base
<= MAX_PRIMARY_ADDRESS
)
489 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
490 if (auxmap_L1
[i
].ent
== NULL
)
491 return "64-bit: .ent is NULL in auxmap_L1";
492 if (auxmap_L1
[i
].ent
->base
!= auxmap_L1
[i
].base
)
493 return "64-bit: _L1 and _L2 bases are inconsistent";
494 /* Look it up in auxmap_L2. */
495 key
.base
= auxmap_L1
[i
].base
;
497 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
499 return "64-bit: _L1 .base not found in _L2";
500 if (res
!= auxmap_L1
[i
].ent
)
501 return "64-bit: _L1 .ent disagrees with _L2 entry";
503 /* Check L1 contains no duplicates */
504 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
505 if (auxmap_L1
[i
].base
== 0)
507 for (j
= i
+1; j
< N_AUXMAP_L1
; j
++) {
508 if (auxmap_L1
[j
].base
== 0)
510 if (auxmap_L1
[j
].base
== auxmap_L1
[i
].base
)
511 return "64-bit: duplicate _L1 .base entries";
515 return NULL
; /* ok */
518 static void insert_into_auxmap_L1_at ( Word rank
, AuxMapEnt
* ent
)
522 tl_assert(rank
>= 0 && rank
< N_AUXMAP_L1
);
523 for (i
= N_AUXMAP_L1
-1; i
> rank
; i
--)
524 auxmap_L1
[i
] = auxmap_L1
[i
-1];
525 auxmap_L1
[rank
].base
= ent
->base
;
526 auxmap_L1
[rank
].ent
= ent
;
529 static INLINE AuxMapEnt
* maybe_find_in_auxmap ( Addr a
)
535 tl_assert(a
> MAX_PRIMARY_ADDRESS
);
538 /* First search the front-cache, which is a self-organising
539 list containing the most popular entries. */
541 if (LIKELY(auxmap_L1
[0].base
== a
))
542 return auxmap_L1
[0].ent
;
543 if (LIKELY(auxmap_L1
[1].base
== a
)) {
544 Addr t_base
= auxmap_L1
[0].base
;
545 AuxMapEnt
* t_ent
= auxmap_L1
[0].ent
;
546 auxmap_L1
[0].base
= auxmap_L1
[1].base
;
547 auxmap_L1
[0].ent
= auxmap_L1
[1].ent
;
548 auxmap_L1
[1].base
= t_base
;
549 auxmap_L1
[1].ent
= t_ent
;
550 return auxmap_L1
[0].ent
;
553 n_auxmap_L1_searches
++;
555 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
556 if (auxmap_L1
[i
].base
== a
) {
560 tl_assert(i
>= 0 && i
<= N_AUXMAP_L1
);
562 n_auxmap_L1_cmps
+= (ULong
)(i
+1);
564 if (i
< N_AUXMAP_L1
) {
566 Addr t_base
= auxmap_L1
[i
-1].base
;
567 AuxMapEnt
* t_ent
= auxmap_L1
[i
-1].ent
;
568 auxmap_L1
[i
-1].base
= auxmap_L1
[i
-0].base
;
569 auxmap_L1
[i
-1].ent
= auxmap_L1
[i
-0].ent
;
570 auxmap_L1
[i
-0].base
= t_base
;
571 auxmap_L1
[i
-0].ent
= t_ent
;
574 return auxmap_L1
[i
].ent
;
577 n_auxmap_L2_searches
++;
579 /* First see if we already have it. */
583 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
585 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, res
);
589 static AuxMapEnt
* find_or_alloc_in_auxmap ( Addr a
)
591 AuxMapEnt
*nyu
, *res
;
593 /* First see if we already have it. */
594 res
= maybe_find_in_auxmap( a
);
598 /* Ok, there's no entry in the secondary map, so we'll have
602 nyu
= (AuxMapEnt
*) VG_(OSetGen_AllocNode
)( auxmap_L2
, sizeof(AuxMapEnt
) );
604 nyu
->sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
605 VG_(OSetGen_Insert
)( auxmap_L2
, nyu
);
606 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, nyu
);
611 /* --------------- SecMap fundamentals --------------- */
613 // In all these, 'low' means it's definitely in the main primary map,
614 // 'high' means it's definitely in the auxiliary table.
616 static INLINE UWord
get_primary_map_low_offset ( Addr a
)
618 UWord pm_off
= a
>> 16;
622 static INLINE SecMap
** get_secmap_low_ptr ( Addr a
)
624 UWord pm_off
= a
>> 16;
625 # if VG_DEBUG_MEMORY >= 1
626 tl_assert(pm_off
< N_PRIMARY_MAP
);
628 return &primary_map
[ pm_off
];
631 static INLINE SecMap
** get_secmap_high_ptr ( Addr a
)
633 AuxMapEnt
* am
= find_or_alloc_in_auxmap(a
);
637 static INLINE SecMap
** get_secmap_ptr ( Addr a
)
639 return ( a
<= MAX_PRIMARY_ADDRESS
640 ? get_secmap_low_ptr(a
)
641 : get_secmap_high_ptr(a
));
644 static INLINE SecMap
* get_secmap_for_reading_low ( Addr a
)
646 return *get_secmap_low_ptr(a
);
649 static INLINE SecMap
* get_secmap_for_reading_high ( Addr a
)
651 return *get_secmap_high_ptr(a
);
654 static INLINE SecMap
* get_secmap_for_writing_low(Addr a
)
656 SecMap
** p
= get_secmap_low_ptr(a
);
657 if (UNLIKELY(is_distinguished_sm(*p
)))
658 *p
= copy_for_writing(*p
);
662 static INLINE SecMap
* get_secmap_for_writing_high ( Addr a
)
664 SecMap
** p
= get_secmap_high_ptr(a
);
665 if (UNLIKELY(is_distinguished_sm(*p
)))
666 *p
= copy_for_writing(*p
);
670 /* Produce the secmap for 'a', either from the primary map or by
671 ensuring there is an entry for it in the aux primary map. The
672 secmap may be a distinguished one as the caller will only want to
675 static INLINE SecMap
* get_secmap_for_reading ( Addr a
)
677 return ( a
<= MAX_PRIMARY_ADDRESS
678 ? get_secmap_for_reading_low (a
)
679 : get_secmap_for_reading_high(a
) );
682 /* Produce the secmap for 'a', either from the primary map or by
683 ensuring there is an entry for it in the aux primary map. The
684 secmap may not be a distinguished one, since the caller will want
685 to be able to write it. If it is a distinguished secondary, make a
686 writable copy of it, install it, and return the copy instead. (COW
689 static INLINE SecMap
* get_secmap_for_writing ( Addr a
)
691 return ( a
<= MAX_PRIMARY_ADDRESS
692 ? get_secmap_for_writing_low (a
)
693 : get_secmap_for_writing_high(a
) );
696 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
697 allocate one if one doesn't already exist. This is used by the
700 static SecMap
* maybe_get_secmap_for ( Addr a
)
702 if (a
<= MAX_PRIMARY_ADDRESS
) {
703 return get_secmap_for_reading_low(a
);
705 AuxMapEnt
* am
= maybe_find_in_auxmap(a
);
706 return am
? am
->sm
: NULL
;
710 /* --------------- Fundamental functions --------------- */
713 void insert_vabits2_into_vabits8 ( Addr a
, UChar vabits2
, UChar
* vabits8
)
715 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
716 *vabits8
&= ~(0x3 << shift
); // mask out the two old bits
717 *vabits8
|= (vabits2
<< shift
); // mask in the two new bits
721 void insert_vabits4_into_vabits8 ( Addr a
, UChar vabits4
, UChar
* vabits8
)
724 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
725 shift
= (a
& 2) << 1; // shift by 0 or 4
726 *vabits8
&= ~(0xf << shift
); // mask out the four old bits
727 *vabits8
|= (vabits4
<< shift
); // mask in the four new bits
731 UChar
extract_vabits2_from_vabits8 ( Addr a
, UChar vabits8
)
733 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
734 vabits8
>>= shift
; // shift the two bits to the bottom
735 return 0x3 & vabits8
; // mask out the rest
739 UChar
extract_vabits4_from_vabits8 ( Addr a
, UChar vabits8
)
742 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
743 shift
= (a
& 2) << 1; // shift by 0 or 4
744 vabits8
>>= shift
; // shift the four bits to the bottom
745 return 0xf & vabits8
; // mask out the rest
748 // Note that these four are only used in slow cases. The fast cases do
749 // clever things like combine the auxmap check (in
750 // get_secmap_{read,writ}able) with alignment checks.
753 // Any time this function is called, if it is possible that vabits2
754 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
755 // sec-V-bits table must also be set!
757 void set_vabits2 ( Addr a
, UChar vabits2
)
759 SecMap
* sm
= get_secmap_for_writing(a
);
760 UWord sm_off
= SM_OFF(a
);
761 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
765 UChar
get_vabits2 ( Addr a
)
767 SecMap
* sm
= get_secmap_for_reading(a
);
768 UWord sm_off
= SM_OFF(a
);
769 UChar vabits8
= sm
->vabits8
[sm_off
];
770 return extract_vabits2_from_vabits8(a
, vabits8
);
774 // Any time this function is called, if it is possible that any of the
775 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
776 // corresponding entry(s) in the sec-V-bits table must also be set!
778 UChar
get_vabits8_for_aligned_word32 ( Addr a
)
780 SecMap
* sm
= get_secmap_for_reading(a
);
781 UWord sm_off
= SM_OFF(a
);
782 UChar vabits8
= sm
->vabits8
[sm_off
];
787 void set_vabits8_for_aligned_word32 ( Addr a
, UChar vabits8
)
789 SecMap
* sm
= get_secmap_for_writing(a
);
790 UWord sm_off
= SM_OFF(a
);
791 sm
->vabits8
[sm_off
] = vabits8
;
795 // Forward declarations
796 static UWord
get_sec_vbits8(Addr a
);
797 static void set_sec_vbits8(Addr a
, UWord vbits8
);
799 // Returns False if there was an addressability error.
801 Bool
set_vbits8 ( Addr a
, UChar vbits8
)
804 UChar vabits2
= get_vabits2(a
);
805 if ( VA_BITS2_NOACCESS
!= vabits2
) {
806 // Addressable. Convert in-register format to in-memory format.
807 // Also remove any existing sec V bit entry for the byte if no
809 if ( V_BITS8_DEFINED
== vbits8
) { vabits2
= VA_BITS2_DEFINED
; }
810 else if ( V_BITS8_UNDEFINED
== vbits8
) { vabits2
= VA_BITS2_UNDEFINED
; }
811 else { vabits2
= VA_BITS2_PARTDEFINED
;
812 set_sec_vbits8(a
, vbits8
); }
813 set_vabits2(a
, vabits2
);
816 // Unaddressable! Do nothing -- when writing to unaddressable
817 // memory it acts as a black hole, and the V bits can never be seen
818 // again. So we don't have to write them at all.
824 // Returns False if there was an addressability error. In that case, we put
825 // all defined bits into vbits8.
827 Bool
get_vbits8 ( Addr a
, UChar
* vbits8
)
830 UChar vabits2
= get_vabits2(a
);
832 // Convert the in-memory format to in-register format.
833 if ( VA_BITS2_DEFINED
== vabits2
) { *vbits8
= V_BITS8_DEFINED
; }
834 else if ( VA_BITS2_UNDEFINED
== vabits2
) { *vbits8
= V_BITS8_UNDEFINED
; }
835 else if ( VA_BITS2_NOACCESS
== vabits2
) {
836 *vbits8
= V_BITS8_DEFINED
; // Make V bits defined!
839 tl_assert( VA_BITS2_PARTDEFINED
== vabits2
);
840 *vbits8
= get_sec_vbits8(a
);
846 /* --------------- Secondary V bit table ------------ */
848 // This table holds the full V bit pattern for partially-defined bytes
849 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
852 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
853 // then overwrite the same address with a fully defined byte, the sec-V-bit
854 // node will not necessarily be removed. This is because checking for
855 // whether removal is necessary would slow down the fast paths.
857 // To avoid the stale nodes building up too much, we periodically (once the
858 // table reaches a certain size) garbage collect (GC) the table by
859 // traversing it and evicting any nodes not having PDB.
860 // If more than a certain proportion of nodes survived, we increase the
861 // table size so that GCs occur less often.
863 // This policy is designed to avoid bad table bloat in the worst case where
864 // a program creates huge numbers of stale PDBs -- we would get this bloat
865 // if we had no GC -- while handling well the case where a node becomes
866 // stale but shortly afterwards is rewritten with a PDB and so becomes
867 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
868 // remove all stale nodes as soon as possible, we just end up re-adding a
869 // lot of them in later again. The "sufficiently stale" approach avoids
870 // this. (If a program has many live PDBs, performance will just suck,
871 // there's no way around that.)
873 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
874 // holding on to stale entries for 2 GCs before discarding them can lead
875 // to massive space leaks. So we're changing to an arrangement where
876 // lines are evicted as soon as they are observed to be stale during a
877 // GC. This also has a side benefit of allowing the sufficiently_stale
878 // field to be removed from the SecVBitNode struct, reducing its size by
879 // 8 bytes, which is a substantial space saving considering that the
880 // struct was previously 32 or so bytes, on a 64 bit target.
882 // In order to try and mitigate the problem that the "sufficiently stale"
883 // heuristic was designed to avoid, the table size is allowed to drift
884 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
885 // means that nodes will exist in the table longer on average, and hopefully
886 // will be deleted and re-added less frequently.
888 // The previous scaling up mechanism (now called STEPUP) is retained:
889 // if residency exceeds 50%, the table is scaled up, although by a
890 // factor sqrt(2) rather than 2 as before. This effectively doubles the
891 // frequency of GCs when there are many PDBs at reduces the tendency of
892 // stale PDBs to reside for long periods in the table.
894 static OSet
* secVBitTable
;
897 static ULong sec_vbits_new_nodes
= 0;
898 static ULong sec_vbits_updates
= 0;
900 // This must be a power of two; this is checked in mc_pre_clo_init().
901 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
902 // a larger address range) they take more space but we can get multiple
903 // partially-defined bytes in one if they are close to each other, reducing
904 // the number of total nodes. In practice sometimes they are clustered (eg.
905 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
906 // row), but often not. So we choose something intermediate.
907 #define BYTES_PER_SEC_VBIT_NODE 16
909 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
910 // more than this many nodes survive a GC.
911 #define STEPUP_SURVIVOR_PROPORTION 0.5
912 #define STEPUP_GROWTH_FACTOR 1.414213562
914 // If the above heuristic doesn't apply, then we may make the table
915 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
916 // this many nodes survive a GC, _and_ the total table size does
917 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
918 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
919 // effectively although gradually reduces residency and increases time
920 // between GCs for programs with small numbers of PDBs. The 80000 limit
921 // effectively limits the table size to around 2MB for programs with
922 // small numbers of PDBs, whilst giving a reasonably long lifetime to
923 // entries, to try and reduce the costs resulting from deleting and
924 // re-adding of entries.
925 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
926 #define DRIFTUP_GROWTH_FACTOR 1.015
927 #define DRIFTUP_MAX_SIZE 80000
929 // We GC the table when it gets this many nodes in it, ie. it's effectively
930 // the table size. It can change.
931 static Int secVBitLimit
= 1000;
933 // The number of GCs done, used to age sec-V-bit nodes for eviction.
934 // Because it's unsigned, wrapping doesn't matter -- the right answer will
936 static UInt GCs_done
= 0;
941 UChar vbits8
[BYTES_PER_SEC_VBIT_NODE
];
945 static OSet
* createSecVBitTable(void)
947 OSet
* newSecVBitTable
;
948 newSecVBitTable
= VG_(OSetGen_Create_With_Pool
)
949 ( offsetof(SecVBitNode
, a
),
950 NULL
, // use fast comparisons
951 VG_(malloc
), "mc.cSVT.1 (sec VBit table)",
954 sizeof(SecVBitNode
));
955 return newSecVBitTable
;
958 static void gcSecVBitTable(void)
962 Int i
, n_nodes
= 0, n_survivors
= 0;
966 // Create the new table.
967 secVBitTable2
= createSecVBitTable();
969 // Traverse the table, moving fresh nodes into the new table.
970 VG_(OSetGen_ResetIter
)(secVBitTable
);
971 while ( (n
= VG_(OSetGen_Next
)(secVBitTable
)) ) {
972 // Keep node if any of its bytes are non-stale. Using
973 // get_vabits2() for the lookup is not very efficient, but I don't
975 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
976 if (VA_BITS2_PARTDEFINED
== get_vabits2(n
->a
+ i
)) {
977 // Found a non-stale byte, so keep =>
978 // Insert a copy of the node into the new table.
980 VG_(OSetGen_AllocNode
)(secVBitTable2
, sizeof(SecVBitNode
));
982 VG_(OSetGen_Insert
)(secVBitTable2
, n2
);
988 // Get the before and after sizes.
989 n_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
990 n_survivors
= VG_(OSetGen_Size
)(secVBitTable2
);
992 // Destroy the old table, and put the new one in its place.
993 VG_(OSetGen_Destroy
)(secVBitTable
);
994 secVBitTable
= secVBitTable2
;
996 if (VG_(clo_verbosity
) > 1 && n_nodes
!= 0) {
997 VG_(message
)(Vg_DebugMsg
, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
998 n_nodes
, n_survivors
, n_survivors
* 100.0 / n_nodes
);
1001 // Increase table size if necessary.
1002 if ((Double
)n_survivors
1003 > ((Double
)secVBitLimit
* STEPUP_SURVIVOR_PROPORTION
)) {
1004 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)STEPUP_GROWTH_FACTOR
);
1005 if (VG_(clo_verbosity
) > 1)
1006 VG_(message
)(Vg_DebugMsg
,
1007 "memcheck GC: %d new table size (stepup)\n",
1011 if (secVBitLimit
< DRIFTUP_MAX_SIZE
1012 && (Double
)n_survivors
1013 > ((Double
)secVBitLimit
* DRIFTUP_SURVIVOR_PROPORTION
)) {
1014 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)DRIFTUP_GROWTH_FACTOR
);
1015 if (VG_(clo_verbosity
) > 1)
1016 VG_(message
)(Vg_DebugMsg
,
1017 "memcheck GC: %d new table size (driftup)\n",
1022 static UWord
get_sec_vbits8(Addr a
)
1024 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1025 Int amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1026 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1028 tl_assert2(n
, "get_sec_vbits8: no node for address %p (%p)\n", aAligned
, a
);
1029 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1030 // make it to the secondary V bits table.
1031 vbits8
= n
->vbits8
[amod
];
1032 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1036 static void set_sec_vbits8(Addr a
, UWord vbits8
)
1038 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1039 Int i
, amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1040 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1041 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1042 // make it to the secondary V bits table.
1043 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1045 n
->vbits8
[amod
] = vbits8
; // update
1046 sec_vbits_updates
++;
1048 // Do a table GC if necessary. Nb: do this before creating and
1049 // inserting the new node, to avoid erroneously GC'ing the new node.
1050 if (secVBitLimit
== VG_(OSetGen_Size
)(secVBitTable
)) {
1054 // New node: assign the specific byte, make the rest invalid (they
1055 // should never be read as-is, but be cautious).
1056 n
= VG_(OSetGen_AllocNode
)(secVBitTable
, sizeof(SecVBitNode
));
1058 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
1059 n
->vbits8
[i
] = V_BITS8_UNDEFINED
;
1061 n
->vbits8
[amod
] = vbits8
;
1063 // Insert the new node.
1064 VG_(OSetGen_Insert
)(secVBitTable
, n
);
1065 sec_vbits_new_nodes
++;
1067 n_secVBit_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
1068 if (n_secVBit_nodes
> max_secVBit_nodes
)
1069 max_secVBit_nodes
= n_secVBit_nodes
;
1073 /* --------------- Endianness helpers --------------- */
1075 /* Returns the offset in memory of the byteno-th most significant byte
1076 in a wordszB-sized word, given the specified endianness. */
1077 static INLINE UWord
byte_offset_w ( UWord wordszB
, Bool bigendian
,
1079 return bigendian
? (wordszB
-1-byteno
) : byteno
;
1083 /* --------------- Ignored address ranges --------------- */
1085 /* Denotes the address-error-reportability status for address ranges:
1086 IAR_NotIgnored: the usual case -- report errors in this range
1087 IAR_CommandLine: don't report errors -- from command line setting
1088 IAR_ClientReq: don't report errors -- from client request
1091 enum { IAR_INVALID
=99,
1097 static const HChar
* showIARKind ( IARKind iark
)
1100 case IAR_INVALID
: return "INVALID";
1101 case IAR_NotIgnored
: return "NotIgnored";
1102 case IAR_CommandLine
: return "CommandLine";
1103 case IAR_ClientReq
: return "ClientReq";
1104 default: return "???";
1108 // RangeMap<IARKind>
1109 static RangeMap
* gIgnoredAddressRanges
= NULL
;
1111 static void init_gIgnoredAddressRanges ( void )
1113 if (LIKELY(gIgnoredAddressRanges
!= NULL
))
1115 gIgnoredAddressRanges
= VG_(newRangeMap
)( VG_(malloc
), "mc.igIAR.1",
1116 VG_(free
), IAR_NotIgnored
);
1119 Bool
MC_(in_ignored_range
) ( Addr a
)
1121 if (LIKELY(gIgnoredAddressRanges
== NULL
))
1123 UWord how
= IAR_INVALID
;
1124 UWord key_min
= ~(UWord
)0;
1125 UWord key_max
= (UWord
)0;
1126 VG_(lookupRangeMap
)(&key_min
, &key_max
, &how
, gIgnoredAddressRanges
, a
);
1127 tl_assert(key_min
<= a
&& a
<= key_max
);
1129 case IAR_NotIgnored
: return False
;
1130 case IAR_CommandLine
: return True
;
1131 case IAR_ClientReq
: return True
;
1132 default: break; /* invalid */
1134 VG_(tool_panic
)("MC_(in_ignore_range)");
1138 Bool
MC_(in_ignored_range_below_sp
) ( Addr sp
, Addr a
, UInt szB
)
1140 if (LIKELY(!MC_(clo_ignore_range_below_sp
)))
1142 tl_assert(szB
>= 1 && szB
<= 32);
1143 tl_assert(MC_(clo_ignore_range_below_sp__first_offset
)
1144 > MC_(clo_ignore_range_below_sp__last_offset
));
1145 Addr range_lo
= sp
- MC_(clo_ignore_range_below_sp__first_offset
);
1146 Addr range_hi
= sp
- MC_(clo_ignore_range_below_sp__last_offset
);
1147 if (range_lo
>= range_hi
) {
1148 /* Bizarre. We have a wraparound situation. What should we do? */
1149 return False
; // Play safe
1151 /* This is the expected case. */
1152 if (range_lo
<= a
&& a
+ szB
- 1 <= range_hi
)
1161 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1163 static Bool
parse_Addr_pair ( const HChar
** ppc
, Addr
* result1
, Addr
* result2
)
1165 Bool ok
= VG_(parse_Addr
) (ppc
, result1
);
1171 ok
= VG_(parse_Addr
) (ppc
, result2
);
1177 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1180 static Bool
parse_UInt_pair ( const HChar
** ppc
, UInt
* result1
, UInt
* result2
)
1182 Bool ok
= VG_(parse_UInt
) (ppc
, result1
);
1188 ok
= VG_(parse_UInt
) (ppc
, result2
);
1194 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1195 fail. If they are valid, add them to the global set of ignored
1197 static Bool
parse_ignore_ranges ( const HChar
* str0
)
1199 init_gIgnoredAddressRanges();
1200 const HChar
* str
= str0
;
1201 const HChar
** ppc
= &str
;
1203 Addr start
= ~(Addr
)0;
1205 Bool ok
= parse_Addr_pair(ppc
, &start
, &end
);
1210 VG_(bindRangeMap
)( gIgnoredAddressRanges
, start
, end
, IAR_CommandLine
);
1221 /* Add or remove [start, +len) from the set of ignored ranges. */
1222 static Bool
modify_ignore_ranges ( Bool addRange
, Addr start
, Addr len
)
1224 init_gIgnoredAddressRanges();
1225 const Bool verbose
= (VG_(clo_verbosity
) > 1);
1230 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1231 start
, start
+len
-1, IAR_ClientReq
);
1233 VG_(dmsg
)("memcheck: modify_ignore_ranges: add %p %p\n",
1234 (void*)start
, (void*)(start
+len
-1));
1236 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1237 start
, start
+len
-1, IAR_NotIgnored
);
1239 VG_(dmsg
)("memcheck: modify_ignore_ranges: del %p %p\n",
1240 (void*)start
, (void*)(start
+len
-1));
1243 VG_(dmsg
)("memcheck: now have %u ranges:\n",
1244 VG_(sizeRangeMap
)(gIgnoredAddressRanges
));
1246 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
1247 UWord val
= IAR_INVALID
;
1248 UWord key_min
= ~(UWord
)0;
1249 UWord key_max
= (UWord
)0;
1250 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
1251 gIgnoredAddressRanges
, i
);
1252 VG_(dmsg
)("memcheck: [%u] %016lx-%016lx %s\n",
1253 i
, key_min
, key_max
, showIARKind(val
));
1260 /* --------------- Load/store slow cases. --------------- */
1263 __attribute__((noinline
))
1264 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong
* res
,
1265 Addr a
, SizeT nBits
, Bool bigendian
)
1267 ULong pessim
[4]; /* only used when p-l-ok=yes */
1268 SSizeT szB
= nBits
/ 8;
1269 SSizeT szL
= szB
/ 8; /* Size in Longs (64-bit units) */
1270 SSizeT i
, j
; /* Must be signed. */
1271 SizeT n_addrs_bad
= 0;
1276 /* Code below assumes load size is a power of two and at least 64
1278 tl_assert((szB
& (szB
-1)) == 0 && szL
> 0);
1280 /* If this triggers, you probably just need to increase the size of
1281 the pessim array. */
1282 tl_assert(szL
<= sizeof(pessim
) / sizeof(pessim
[0]));
1284 for (j
= 0; j
< szL
; j
++) {
1285 pessim
[j
] = V_BITS64_DEFINED
;
1286 res
[j
] = V_BITS64_UNDEFINED
;
1289 /* Make up a result V word, which contains the loaded data for
1290 valid addresses and Defined for invalid addresses. Iterate over
1291 the bytes in the word, from the most significant down to the
1292 least. The vbits to return are calculated into vbits128. Also
1293 compute the pessimising value to be used when
1294 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1295 info can be gleaned from the pessim array) but is used as a
1297 for (j
= szL
-1; j
>= 0; j
--) {
1298 ULong vbits64
= V_BITS64_UNDEFINED
;
1299 ULong pessim64
= V_BITS64_DEFINED
;
1300 UWord long_index
= byte_offset_w(szL
, bigendian
, j
);
1301 for (i
= 8-1; i
>= 0; i
--) {
1302 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP
);
1303 ai
= a
+ 8*long_index
+ byte_offset_w(8, bigendian
, i
);
1304 ok
= get_vbits8(ai
, &vbits8
);
1307 if (!ok
) n_addrs_bad
++;
1309 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1311 res
[long_index
] = vbits64
;
1312 pessim
[long_index
] = pessim64
;
1315 /* In the common case, all the addresses involved are valid, so we
1316 just return the computed V bits and have done. */
1317 if (LIKELY(n_addrs_bad
== 0))
1320 /* If there's no possibility of getting a partial-loads-ok
1321 exemption, report the error and quit. */
1322 if (!MC_(clo_partial_loads_ok
)) {
1323 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1327 /* The partial-loads-ok excemption might apply. Find out if it
1328 does. If so, don't report an addressing error, but do return
1329 Undefined for the bytes that are out of range, so as to avoid
1330 false negatives. If it doesn't apply, just report an addressing
1331 error in the usual way. */
1333 /* Some code steps along byte strings in aligned chunks
1334 even when there is only a partially defined word at the end (eg,
1335 optimised strlen). This is allowed by the memory model of
1336 modern machines, since an aligned load cannot span two pages and
1337 thus cannot "partially fault".
1339 Therefore, a load from a partially-addressible place is allowed
1340 if all of the following hold:
1341 - the command-line flag is set [by default, it isn't]
1342 - it's an aligned load
1343 - at least one of the addresses in the word *is* valid
1345 Since this suppresses the addressing error, we avoid false
1346 negatives by marking bytes undefined when they come from an
1350 /* "at least one of the addresses is invalid" */
1352 for (j
= 0; j
< szL
; j
++)
1353 ok
|= pessim
[j
] != V_BITS64_DEFINED
;
1356 # if defined(VGP_s390x_linux)
1357 tl_assert(szB
== 16); // s390 doesn't have > 128 bit SIMD
1358 /* OK if all loaded bytes are from the same page. */
1359 Bool alignedOK
= ((a
& 0xfff) <= 0x1000 - szB
);
1360 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1361 /* lxvd2x might generate an unaligned 128 bit vector load. */
1362 Bool alignedOK
= (szB
== 16);
1364 /* OK if the address is aligned by the load size. */
1365 Bool alignedOK
= (0 == (a
& (szB
- 1)));
1368 if (alignedOK
&& n_addrs_bad
< szB
) {
1369 /* Exemption applies. Use the previously computed pessimising
1370 value and return the combined result, but don't flag an
1371 addressing error. The pessimising value is Defined for valid
1372 addresses and Undefined for invalid addresses. */
1373 /* for assumption that doing bitwise or implements UifU */
1374 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1375 /* (really need "UifU" here...)
1376 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1377 for (j
= szL
-1; j
>= 0; j
--)
1378 res
[j
] |= pessim
[j
];
1382 /* Exemption doesn't apply. Flag an addressing error in the normal
1384 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1388 __attribute__((noinline
))
1389 __attribute__((used
))
1391 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
);
1394 __attribute__((noinline
))
1395 __attribute__((used
))
1396 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1397 this function may get called from hand written assembly. */
1398 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
)
1400 PROF_EVENT(MCPE_LOADVN_SLOW
);
1402 /* ------------ BEGIN semi-fast cases ------------ */
1403 /* These deal quickly-ish with the common auxiliary primary map
1404 cases on 64-bit platforms. Are merely a speedup hack; can be
1405 omitted without loss of correctness/functionality. Note that in
1406 both cases the "sizeof(void*) == 8" causes these cases to be
1407 folded out by compilers on 32-bit platforms. These are derived
1408 from LOADV64 and LOADV32.
1411 # if defined(VGA_mips64) && defined(VGABI_N32)
1412 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1414 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1417 SecMap
* sm
= get_secmap_for_reading(a
);
1418 UWord sm_off16
= SM_OFF_16(a
);
1419 UWord vabits16
= sm
->vabits16
[sm_off16
];
1420 if (LIKELY(vabits16
== VA_BITS16_DEFINED
))
1421 return V_BITS64_DEFINED
;
1422 if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
))
1423 return V_BITS64_UNDEFINED
;
1424 /* else fall into the slow case */
1427 # if defined(VGA_mips64) && defined(VGABI_N32)
1428 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1430 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1433 SecMap
* sm
= get_secmap_for_reading(a
);
1434 UWord sm_off
= SM_OFF(a
);
1435 UWord vabits8
= sm
->vabits8
[sm_off
];
1436 if (LIKELY(vabits8
== VA_BITS8_DEFINED
))
1437 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
1438 if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
))
1439 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
1440 /* else fall into slow case */
1443 /* ------------ END semi-fast cases ------------ */
1445 ULong vbits64
= V_BITS64_UNDEFINED
; /* result */
1446 ULong pessim64
= V_BITS64_DEFINED
; /* only used when p-l-ok=yes */
1447 SSizeT szB
= nBits
/ 8;
1448 SSizeT i
; /* Must be signed. */
1449 SizeT n_addrs_bad
= 0;
1454 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1456 /* Make up a 64-bit result V word, which contains the loaded data
1457 for valid addresses and Defined for invalid addresses. Iterate
1458 over the bytes in the word, from the most significant down to
1459 the least. The vbits to return are calculated into vbits64.
1460 Also compute the pessimising value to be used when
1461 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1462 info can be gleaned from pessim64) but is used as a
1464 for (i
= szB
-1; i
>= 0; i
--) {
1465 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP
);
1466 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1467 ok
= get_vbits8(ai
, &vbits8
);
1470 if (!ok
) n_addrs_bad
++;
1472 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1475 /* In the common case, all the addresses involved are valid, so we
1476 just return the computed V bits and have done. */
1477 if (LIKELY(n_addrs_bad
== 0))
1480 /* If there's no possibility of getting a partial-loads-ok
1481 exemption, report the error and quit. */
1482 if (!MC_(clo_partial_loads_ok
)) {
1483 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1487 /* The partial-loads-ok excemption might apply. Find out if it
1488 does. If so, don't report an addressing error, but do return
1489 Undefined for the bytes that are out of range, so as to avoid
1490 false negatives. If it doesn't apply, just report an addressing
1491 error in the usual way. */
1493 /* Some code steps along byte strings in aligned word-sized chunks
1494 even when there is only a partially defined word at the end (eg,
1495 optimised strlen). This is allowed by the memory model of
1496 modern machines, since an aligned load cannot span two pages and
1497 thus cannot "partially fault". Despite such behaviour being
1498 declared undefined by ANSI C/C++.
1500 Therefore, a load from a partially-addressible place is allowed
1501 if all of the following hold:
1502 - the command-line flag is set [by default, it isn't]
1503 - it's a word-sized, word-aligned load
1504 - at least one of the addresses in the word *is* valid
1506 Since this suppresses the addressing error, we avoid false
1507 negatives by marking bytes undefined when they come from an
1511 /* "at least one of the addresses is invalid" */
1512 tl_assert(pessim64
!= V_BITS64_DEFINED
);
1514 # if defined(VGA_mips64) && defined(VGABI_N32)
1515 if (szB
== VG_WORDSIZE
* 2 && VG_IS_WORD_ALIGNED(a
)
1516 && n_addrs_bad
< VG_WORDSIZE
* 2)
1517 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1518 /* On power unaligned loads of words are OK. */
1519 if (szB
== VG_WORDSIZE
&& n_addrs_bad
< VG_WORDSIZE
)
1521 if (szB
== VG_WORDSIZE
&& VG_IS_WORD_ALIGNED(a
)
1522 && n_addrs_bad
< VG_WORDSIZE
)
1525 /* Exemption applies. Use the previously computed pessimising
1526 value for vbits64 and return the combined result, but don't
1527 flag an addressing error. The pessimising value is Defined
1528 for valid addresses and Undefined for invalid addresses. */
1529 /* for assumption that doing bitwise or implements UifU */
1530 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1531 /* (really need "UifU" here...)
1532 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1533 vbits64
|= pessim64
;
1537 /* Also, in appears that gcc generates string-stepping code in
1538 32-bit chunks on 64 bit platforms. So, also grant an exception
1539 for this case. Note that the first clause of the conditional
1540 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1541 will get folded out in 32 bit builds. */
1542 # if defined(VGA_mips64) && defined(VGABI_N32)
1543 if (VG_WORDSIZE
== 4
1544 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1546 if (VG_WORDSIZE
== 8
1547 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1550 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1551 /* (really need "UifU" here...)
1552 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1553 vbits64
|= pessim64
;
1554 /* Mark the upper 32 bits as undefined, just to be on the safe
1556 vbits64
|= (((ULong
)V_BITS32_UNDEFINED
) << 32);
1560 /* Exemption doesn't apply. Flag an addressing error in the normal
1562 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1569 __attribute__((noinline
))
1570 void mc_STOREVn_slow ( Addr a
, SizeT nBits
, ULong vbytes
, Bool bigendian
)
1572 SizeT szB
= nBits
/ 8;
1573 SizeT i
, n_addrs_bad
= 0;
1578 PROF_EVENT(MCPE_STOREVN_SLOW
);
1580 /* ------------ BEGIN semi-fast cases ------------ */
1581 /* These deal quickly-ish with the common auxiliary primary map
1582 cases on 64-bit platforms. Are merely a speedup hack; can be
1583 omitted without loss of correctness/functionality. Note that in
1584 both cases the "sizeof(void*) == 8" causes these cases to be
1585 folded out by compilers on 32-bit platforms. The logic below
1586 is somewhat similar to some cases extensively commented in
1587 MC_(helperc_STOREV8).
1589 # if defined(VGA_mips64) && defined(VGABI_N32)
1590 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1592 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1595 SecMap
* sm
= get_secmap_for_reading(a
);
1596 UWord sm_off16
= SM_OFF_16(a
);
1597 UWord vabits16
= sm
->vabits16
[sm_off16
];
1598 if (LIKELY( !is_distinguished_sm(sm
) &&
1599 (VA_BITS16_DEFINED
== vabits16
||
1600 VA_BITS16_UNDEFINED
== vabits16
) )) {
1601 /* Handle common case quickly: a is suitably aligned, */
1602 /* is mapped, and is addressible. */
1603 // Convert full V-bits in register to compact 2-bit form.
1604 if (LIKELY(V_BITS64_DEFINED
== vbytes
)) {
1605 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
1607 } else if (V_BITS64_UNDEFINED
== vbytes
) {
1608 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
1611 /* else fall into the slow case */
1613 /* else fall into the slow case */
1616 # if defined(VGA_mips64) && defined(VGABI_N32)
1617 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1619 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1622 SecMap
* sm
= get_secmap_for_reading(a
);
1623 UWord sm_off
= SM_OFF(a
);
1624 UWord vabits8
= sm
->vabits8
[sm_off
];
1625 if (LIKELY( !is_distinguished_sm(sm
) &&
1626 (VA_BITS8_DEFINED
== vabits8
||
1627 VA_BITS8_UNDEFINED
== vabits8
) )) {
1628 /* Handle common case quickly: a is suitably aligned, */
1629 /* is mapped, and is addressible. */
1630 // Convert full V-bits in register to compact 2-bit form.
1631 if (LIKELY(V_BITS32_DEFINED
== (vbytes
& 0xFFFFFFFF))) {
1632 sm
->vabits8
[sm_off
] = VA_BITS8_DEFINED
;
1634 } else if (V_BITS32_UNDEFINED
== (vbytes
& 0xFFFFFFFF)) {
1635 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
1638 /* else fall into the slow case */
1640 /* else fall into the slow case */
1642 /* ------------ END semi-fast cases ------------ */
1644 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1646 /* Dump vbytes in memory, iterating from least to most significant
1647 byte. At the same time establish addressibility of the location. */
1648 for (i
= 0; i
< szB
; i
++) {
1649 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP
);
1650 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1651 vbits8
= vbytes
& 0xff;
1652 ok
= set_vbits8(ai
, vbits8
);
1653 if (!ok
) n_addrs_bad
++;
1657 /* If an address error has happened, report it. */
1658 if (n_addrs_bad
> 0)
1659 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, True
);
1663 /*------------------------------------------------------------*/
1664 /*--- Setting permissions over address ranges. ---*/
1665 /*------------------------------------------------------------*/
1667 static void set_address_range_perms ( Addr a
, SizeT lenT
, UWord vabits16
,
1670 UWord sm_off
, sm_off16
;
1671 UWord vabits2
= vabits16
& 0x3;
1672 SizeT lenA
, lenB
, len_to_next_secmap
;
1676 SecMap
* example_dsm
;
1678 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS
);
1680 /* Check the V+A bits make sense. */
1681 tl_assert(VA_BITS16_NOACCESS
== vabits16
||
1682 VA_BITS16_UNDEFINED
== vabits16
||
1683 VA_BITS16_DEFINED
== vabits16
);
1685 // This code should never write PDBs; ensure this. (See comment above
1687 tl_assert(VA_BITS2_PARTDEFINED
!= vabits2
);
1692 if (lenT
> 256 * 1024 * 1024) {
1693 if (VG_(clo_verbosity
) > 0 && !VG_(clo_xml
)) {
1694 const HChar
* s
= "unknown???";
1695 if (vabits16
== VA_BITS16_NOACCESS
) s
= "noaccess";
1696 if (vabits16
== VA_BITS16_UNDEFINED
) s
= "undefined";
1697 if (vabits16
== VA_BITS16_DEFINED
) s
= "defined";
1698 VG_(message
)(Vg_UserMsg
, "Warning: set address range perms: "
1699 "large range [0x%lx, 0x%lx) (%s)\n",
1704 #ifndef PERF_FAST_SARP
1705 /*------------------ debug-only case ------------------ */
1707 // Endianness doesn't matter here because all bytes are being set to
1709 // Nb: We don't have to worry about updating the sec-V-bits table
1710 // after these set_vabits2() calls because this code never writes
1711 // VA_BITS2_PARTDEFINED values.
1713 for (i
= 0; i
< lenT
; i
++) {
1714 set_vabits2(a
+ i
, vabits2
);
1720 /*------------------ standard handling ------------------ */
1722 /* Get the distinguished secondary that we might want
1723 to use (part of the space-compression scheme). */
1724 example_dsm
= &sm_distinguished
[dsm_num
];
1726 // We have to handle ranges covering various combinations of partial and
1727 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1728 // Cases marked with a '*' are common.
1732 // * one partial sec-map (p) 1
1733 // - one whole sec-map (P) 2
1735 // * two partial sec-maps (pp) 1,3
1736 // - one partial, one whole sec-map (pP) 1,2
1737 // - one whole, one partial sec-map (Pp) 2,3
1738 // - two whole sec-maps (PP) 2,2
1740 // * one partial, one whole, one partial (pPp) 1,2,3
1741 // - one partial, two whole (pPP) 1,2,2
1742 // - two whole, one partial (PPp) 2,2,3
1743 // - three whole (PPP) 2,2,2
1745 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1746 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1747 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1748 // - N whole (PP...PP) 2,2...2,3
1750 // Break up total length (lenT) into two parts: length in the first
1751 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1752 aNext
= start_of_this_sm(a
) + SM_SIZE
;
1753 len_to_next_secmap
= aNext
- a
;
1754 if ( lenT
<= len_to_next_secmap
) {
1755 // Range entirely within one sec-map. Covers almost all cases.
1756 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
);
1759 } else if (is_start_of_sm(a
)) {
1760 // Range spans at least one whole sec-map, and starts at the beginning
1761 // of a sec-map; skip to Part 2.
1762 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
);
1767 // Range spans two or more sec-maps, first one is partial.
1768 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
);
1769 lenA
= len_to_next_secmap
;
1773 //------------------------------------------------------------------------
1774 // Part 1: Deal with the first sec_map. Most of the time the range will be
1775 // entirely within a sec_map and this part alone will suffice. Also,
1776 // doing it this way lets us avoid repeatedly testing for the crossing of
1777 // a sec-map boundary within these loops.
1778 //------------------------------------------------------------------------
1780 // If it's distinguished, make it undistinguished if necessary.
1781 sm_ptr
= get_secmap_ptr(a
);
1782 if (is_distinguished_sm(*sm_ptr
)) {
1783 if (*sm_ptr
== example_dsm
) {
1784 // Sec-map already has the V+A bits that we want, so skip.
1785 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
);
1789 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
);
1790 *sm_ptr
= copy_for_writing(*sm_ptr
);
1797 if (VG_IS_8_ALIGNED(a
)) break;
1798 if (lenA
< 1) break;
1799 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
);
1801 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1805 // 8-aligned, 8 byte steps
1807 if (lenA
< 8) break;
1808 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
);
1809 sm_off16
= SM_OFF_16(a
);
1810 sm
->vabits16
[sm_off16
] = vabits16
;
1816 if (lenA
< 1) break;
1817 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
);
1819 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1824 // We've finished the first sec-map. Is that it?
1828 //------------------------------------------------------------------------
1829 // Part 2: Fast-set entire sec-maps at a time.
1830 //------------------------------------------------------------------------
1832 // 64KB-aligned, 64KB steps.
1833 // Nb: we can reach here with lenB < SM_SIZE
1834 tl_assert(0 == lenA
);
1836 if (lenB
< SM_SIZE
) break;
1837 tl_assert(is_start_of_sm(a
));
1838 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
);
1839 sm_ptr
= get_secmap_ptr(a
);
1840 if (!is_distinguished_sm(*sm_ptr
)) {
1841 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
);
1842 // Free the non-distinguished sec-map that we're replacing. This
1843 // case happens moderately often, enough to be worthwhile.
1844 SysRes sres
= VG_(am_munmap_valgrind
)((Addr
)*sm_ptr
, sizeof(SecMap
));
1845 tl_assert2(! sr_isError(sres
), "SecMap valgrind munmap failure\n");
1847 update_SM_counts(*sm_ptr
, example_dsm
);
1848 // Make the sec-map entry point to the example DSM
1849 *sm_ptr
= example_dsm
;
1854 // We've finished the whole sec-maps. Is that it?
1858 //------------------------------------------------------------------------
1859 // Part 3: Finish off the final partial sec-map, if necessary.
1860 //------------------------------------------------------------------------
1862 tl_assert(is_start_of_sm(a
) && lenB
< SM_SIZE
);
1864 // If it's distinguished, make it undistinguished if necessary.
1865 sm_ptr
= get_secmap_ptr(a
);
1866 if (is_distinguished_sm(*sm_ptr
)) {
1867 if (*sm_ptr
== example_dsm
) {
1868 // Sec-map already has the V+A bits that we want, so stop.
1869 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
);
1872 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
);
1873 *sm_ptr
= copy_for_writing(*sm_ptr
);
1878 // 8-aligned, 8 byte steps
1880 if (lenB
< 8) break;
1881 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
);
1882 sm_off16
= SM_OFF_16(a
);
1883 sm
->vabits16
[sm_off16
] = vabits16
;
1889 if (lenB
< 1) return;
1890 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
);
1892 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1899 /* --- Set permissions for arbitrary address ranges --- */
1901 void MC_(make_mem_noaccess
) ( Addr a
, SizeT len
)
1903 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS
);
1904 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a
, len
);
1905 set_address_range_perms ( a
, len
, VA_BITS16_NOACCESS
, SM_DIST_NOACCESS
);
1906 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1907 ocache_sarp_Clear_Origins ( a
, len
);
1910 static void make_mem_undefined ( Addr a
, SizeT len
)
1912 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED
);
1913 DEBUG("make_mem_undefined(%p, %lu)\n", a
, len
);
1914 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1917 void MC_(make_mem_undefined_w_otag
) ( Addr a
, SizeT len
, UInt otag
)
1919 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG
);
1920 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a
, len
);
1921 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1922 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1923 ocache_sarp_Set_Origins ( a
, len
, otag
);
1927 void make_mem_undefined_w_tid_and_okind ( Addr a
, SizeT len
,
1928 ThreadId tid
, UInt okind
)
1932 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1933 if it is invalid. So no need to do it here. */
1934 tl_assert(okind
<= 3);
1935 here
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
1937 ecu
= VG_(get_ECU_from_ExeContext
)(here
);
1938 tl_assert(VG_(is_plausible_ECU
)(ecu
));
1939 MC_(make_mem_undefined_w_otag
) ( a
, len
, ecu
| okind
);
1943 void mc_new_mem_w_tid_make_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1945 make_mem_undefined_w_tid_and_okind ( a
, len
, tid
, MC_OKIND_UNKNOWN
);
1949 void mc_new_mem_w_tid_no_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1951 MC_(make_mem_undefined_w_otag
) ( a
, len
, MC_OKIND_UNKNOWN
);
1954 void MC_(make_mem_defined
) ( Addr a
, SizeT len
)
1956 PROF_EVENT(MCPE_MAKE_MEM_DEFINED
);
1957 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a
, len
);
1958 set_address_range_perms ( a
, len
, VA_BITS16_DEFINED
, SM_DIST_DEFINED
);
1959 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1960 ocache_sarp_Clear_Origins ( a
, len
);
1963 __attribute__((unused
))
1964 static void make_mem_defined_w_tid ( Addr a
, SizeT len
, ThreadId tid
)
1966 MC_(make_mem_defined
)(a
, len
);
1969 /* For each byte in [a,a+len), if the byte is addressable, make it be
1970 defined, but if it isn't addressible, leave it alone. In other
1971 words a version of MC_(make_mem_defined) that doesn't mess with
1972 addressibility. Low-performance implementation. */
1973 static void make_mem_defined_if_addressable ( Addr a
, SizeT len
)
1977 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a
, (ULong
)len
);
1978 for (i
= 0; i
< len
; i
++) {
1979 vabits2
= get_vabits2( a
+i
);
1980 if (LIKELY(VA_BITS2_NOACCESS
!= vabits2
)) {
1981 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1982 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1983 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1989 /* Similarly (needed for mprotect handling ..) */
1990 static void make_mem_defined_if_noaccess ( Addr a
, SizeT len
)
1994 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a
, (ULong
)len
);
1995 for (i
= 0; i
< len
; i
++) {
1996 vabits2
= get_vabits2( a
+i
);
1997 if (LIKELY(VA_BITS2_NOACCESS
== vabits2
)) {
1998 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1999 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
2000 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
2006 /* --- Block-copy permissions (needed for implementing realloc() and
2009 void MC_(copy_address_range_state
) ( Addr src
, Addr dst
, SizeT len
)
2012 UChar vabits2
, vabits8
;
2013 Bool aligned
, nooverlap
;
2015 DEBUG("MC_(copy_address_range_state)\n");
2016 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE
);
2018 if (len
== 0 || src
== dst
)
2021 aligned
= VG_IS_4_ALIGNED(src
) && VG_IS_4_ALIGNED(dst
);
2022 nooverlap
= src
+len
<= dst
|| dst
+len
<= src
;
2024 if (nooverlap
&& aligned
) {
2026 /* Vectorised fast case, when no overlap and suitably aligned */
2030 vabits8
= get_vabits8_for_aligned_word32( src
+i
);
2031 set_vabits8_for_aligned_word32( dst
+i
, vabits8
);
2032 if (LIKELY(VA_BITS8_DEFINED
== vabits8
2033 || VA_BITS8_UNDEFINED
== vabits8
2034 || VA_BITS8_NOACCESS
== vabits8
)) {
2037 /* have to copy secondary map info */
2038 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+0 ))
2039 set_sec_vbits8( dst
+i
+0, get_sec_vbits8( src
+i
+0 ) );
2040 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+1 ))
2041 set_sec_vbits8( dst
+i
+1, get_sec_vbits8( src
+i
+1 ) );
2042 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+2 ))
2043 set_sec_vbits8( dst
+i
+2, get_sec_vbits8( src
+i
+2 ) );
2044 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+3 ))
2045 set_sec_vbits8( dst
+i
+3, get_sec_vbits8( src
+i
+3 ) );
2052 vabits2
= get_vabits2( src
+i
);
2053 set_vabits2( dst
+i
, vabits2
);
2054 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2055 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2063 /* We have to do things the slow way */
2065 for (i
= 0, j
= len
-1; i
< len
; i
++, j
--) {
2066 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
);
2067 vabits2
= get_vabits2( src
+j
);
2068 set_vabits2( dst
+j
, vabits2
);
2069 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2070 set_sec_vbits8( dst
+j
, get_sec_vbits8( src
+j
) );
2076 for (i
= 0; i
< len
; i
++) {
2077 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
);
2078 vabits2
= get_vabits2( src
+i
);
2079 set_vabits2( dst
+i
, vabits2
);
2080 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2081 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2090 /*------------------------------------------------------------*/
2091 /*--- Origin tracking stuff - cache basics ---*/
2092 /*------------------------------------------------------------*/
2094 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2095 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2097 Note that this implementation draws inspiration from the "origin
2098 tracking by value piggybacking" scheme described in "Tracking Bad
2099 Apples: Reporting the Origin of Null and Undefined Value Errors"
2100 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2101 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2102 implemented completely differently.
2104 Origin tags and ECUs -- about the shadow values
2105 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107 This implementation tracks the defining point of all uninitialised
2108 values using so called "origin tags", which are 32-bit integers,
2109 rather than using the values themselves to encode the origins. The
2110 latter, so-called value piggybacking", is what the OOPSLA07 paper
2113 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2114 ints (UInts), regardless of the machine's word size. Each tag
2115 comprises an upper 30-bit ECU field and a lower 2-bit
2116 'kind' field. The ECU field is a number given out by m_execontext
2117 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2118 directly as an origin tag (otag), but in fact we want to put
2119 additional information 'kind' field to indicate roughly where the
2120 tag came from. This helps print more understandable error messages
2121 for the user -- it has no other purpose. In summary:
2123 * Both ECUs and origin tags are represented as 32-bit words
2125 * m_execontext and the core-tool interface deal purely in ECUs.
2126 They have no knowledge of origin tags - that is a purely
2127 Memcheck-internal matter.
2129 * all valid ECUs have the lowest 2 bits zero and at least
2130 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2132 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2133 constants defined in mc_include.h.
2135 * to convert an otag back to an ECU, AND it with ~3
2137 One important fact is that no valid otag is zero. A zero otag is
2138 used by the implementation to indicate "no origin", which could
2139 mean that either the value is defined, or it is undefined but the
2140 implementation somehow managed to lose the origin.
2142 The ECU used for memory created by malloc etc is derived from the
2143 stack trace at the time the malloc etc happens. This means the
2144 mechanism can show the exact allocation point for heap-created
2145 uninitialised values.
2147 In contrast, it is simply too expensive to create a complete
2148 backtrace for each stack allocation. Therefore we merely use a
2149 depth-1 backtrace for stack allocations, which can be done once at
2150 translation time, rather than N times at run time. The result of
2151 this is that, for stack created uninitialised values, Memcheck can
2152 only show the allocating function, and not what called it.
2153 Furthermore, compilers tend to move the stack pointer just once at
2154 the start of the function, to allocate all locals, and so in fact
2155 the stack origin almost always simply points to the opening brace
2156 of the function. Net result is, for stack origins, the mechanism
2157 can tell you in which function the undefined value was created, but
2158 that's all. Users will need to carefully check all locals in the
2161 Shadowing registers and memory
2162 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2164 Memory is shadowed using a two level cache structure (ocacheL1 and
2165 ocacheL2). Memory references are first directed to ocacheL1. This
2166 is a traditional 2-way set associative cache with 32-byte lines and
2167 approximate LRU replacement within each set.
2169 A naive implementation would require storing one 32 bit otag for
2170 each byte of memory covered, a 4:1 space overhead. Instead, there
2171 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2172 that shows which of the 4 bytes have that shadow value and which
2173 have a shadow value of zero (indicating no origin). Hence a lot of
2174 space is saved, but the cost is that only one different origin per
2175 4 bytes of address space can be represented. This is a source of
2176 imprecision, but how much of a problem it really is remains to be
2179 A cache line that contains all zeroes ("no origins") contains no
2180 useful information, and can be ejected from the L1 cache "for
2181 free", in the sense that a read miss on the L1 causes a line of
2182 zeroes to be installed. However, ejecting a line containing
2183 nonzeroes risks losing origin information permanently. In order to
2184 prevent such lossage, ejected nonzero lines are placed in a
2185 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2186 lines. This can grow arbitrarily large, and so should ensure that
2187 Memcheck runs out of memory in preference to losing useful origin
2188 info due to cache size limitations.
2190 Shadowing registers is a bit tricky, because the shadow values are
2191 32 bits, regardless of the size of the register. That gives a
2192 problem for registers smaller than 32 bits. The solution is to
2193 find spaces in the guest state that are unused, and use those to
2194 shadow guest state fragments smaller than 32 bits. For example, on
2195 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2196 shadow are allocated for the register's otag, then there are still
2197 12 bytes left over which could be used to shadow 3 other values.
2199 This implies there is some non-obvious mapping from guest state
2200 (start,length) pairs to the relevant shadow offset (for the origin
2201 tags). And it is unfortunately guest-architecture specific. The
2202 mapping is contained in mc_machine.c, which is quite lengthy but
2205 Instrumenting the IR
2206 ~~~~~~~~~~~~~~~~~~~~
2208 Instrumentation is largely straightforward, and done by the
2209 functions schemeE and schemeS in mc_translate.c. These generate
2210 code for handling the origin tags of expressions (E) and statements
2211 (S) respectively. The rather strange names are a reference to the
2212 "compilation schemes" shown in Simon Peyton Jones' book "The
2213 Implementation of Functional Programming Languages" (Prentice Hall,
2215 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2217 schemeS merely arranges to move shadow values around the guest
2218 state to track the incoming IR. schemeE is largely trivial too.
2219 The only significant point is how to compute the otag corresponding
2220 to binary (or ternary, quaternary, etc) operator applications. The
2221 rule is simple: just take whichever value is larger (32-bit
2222 unsigned max). Constants get the special value zero. Hence this
2223 rule always propagates a nonzero (known) otag in preference to a
2224 zero (unknown, or more likely, value-is-defined) tag, as we want.
2225 If two different undefined values are inputs to a binary operator
2226 application, then which is propagated is arbitrary, but that
2227 doesn't matter, since the program is erroneous in using either of
2228 the values, and so there's no point in attempting to propagate
2231 Since constants are abstracted to (otag) zero, much of the
2232 instrumentation code can be folded out without difficulty by the
2233 generic post-instrumentation IR cleanup pass, using these rules:
2234 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2235 constants is evaluated at JIT time. And the resulting dead code
2236 removal. In practice this causes surprisingly few Max32Us to
2237 survive through to backend code generation.
2239 Integration with the V-bits machinery
2240 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2242 This is again largely straightforward. Mostly the otag and V bits
2243 stuff are independent. The only point of interaction is when the V
2244 bits instrumenter creates a call to a helper function to report an
2245 uninitialised value error -- in that case it must first use schemeE
2246 to get hold of the origin tag expression for the value, and pass
2247 that to the helper too.
2249 There is the usual stuff to do with setting address range
2250 permissions. When memory is painted undefined, we must also know
2251 the origin tag to paint with, which involves some tedious plumbing,
2252 particularly to do with the fast case stack handlers. When memory
2253 is painted defined or noaccess then the origin tags must be forced
2256 One of the goals of the implementation was to ensure that the
2257 non-origin tracking mode isn't slowed down at all. To do this,
2258 various functions to do with memory permissions setting (again,
2259 mostly pertaining to the stack) are duplicated for the with- and
2262 Dealing with stack redzones, and the NIA cache
2263 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2265 This is one of the few non-obvious parts of the implementation.
2267 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2268 reserved area below the stack pointer, that can be used as scratch
2269 space by compiler generated code for functions. In the Memcheck
2270 sources this is referred to as the "stack redzone". The important
2271 thing here is that such redzones are considered volatile across
2272 function calls and returns. So Memcheck takes care to mark them as
2273 undefined for each call and return, on the afflicted platforms.
2274 Past experience shows this is essential in order to get reliable
2275 messages about uninitialised values that come from the stack.
2277 So the question is, when we paint a redzone undefined, what origin
2278 tag should we use for it? Consider a function f() calling g(). If
2279 we paint the redzone using an otag derived from the ExeContext of
2280 the CALL/BL instruction in f, then any errors in g causing it to
2281 use uninitialised values that happen to lie in the redzone, will be
2282 reported as having their origin in f. Which is highly confusing.
2284 The same applies for returns: if, on a return, we paint the redzone
2285 using a origin tag derived from the ExeContext of the RET/BLR
2286 instruction in g, then any later errors in f causing it to use
2287 uninitialised values in the redzone, will be reported as having
2288 their origin in g. Which is just as confusing.
2290 To do it right, in both cases we need to use an origin tag which
2291 pertains to the instruction which dynamically follows the CALL/BL
2292 or RET/BLR. In short, one derived from the NIA - the "next
2293 instruction address".
2295 To make this work, Memcheck's redzone-painting helper,
2296 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2297 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2298 ExeContext's ECU as the basis for the otag used to paint the
2299 redzone. The expensive part of this is converting an NIA into an
2300 ECU, since this happens once for every call and every return. So
2301 we use a simple 511-line, 2-way set associative cache
2302 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2305 Further background comments
2306 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2308 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2309 > it really just the address of the relevant ExeContext?
2311 Well, it's not the address, but a value which has a 1-1 mapping
2312 with ExeContexts, and is guaranteed not to be zero, since zero
2313 denotes (to memcheck) "unknown origin or defined value". So these
2314 UInts are just numbers starting at 4 and incrementing by 4; each
2315 ExeContext is given a number when it is created. (*** NOTE this
2316 confuses otags and ECUs; see comments above ***).
2318 Making these otags 32-bit regardless of the machine's word size
2319 makes the 64-bit implementation easier (next para). And it doesn't
2320 really limit us in any way, since for the tags to overflow would
2321 require that the program somehow caused 2^30-1 different
2322 ExeContexts to be created, in which case it is probably in deep
2323 trouble. Not to mention V will have soaked up many tens of
2324 gigabytes of memory merely to store them all.
2326 So having 64-bit origins doesn't really buy you anything, and has
2327 the following downsides:
2329 Suppose that instead, an otag is a UWord. This would mean that, on
2332 1. It becomes hard to shadow any element of guest state which is
2333 smaller than 8 bytes. To do so means you'd need to find some
2334 8-byte-sized hole in the guest state which you don't want to
2335 shadow, and use that instead to hold the otag. On ppc64, the
2336 condition code register(s) are split into 20 UChar sized pieces,
2337 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2338 and so that would entail finding 160 bytes somewhere else in the
2341 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2342 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2343 same) and so I had to look for 4 untracked otag-sized areas in
2344 the guest state to make that possible.
2346 The same problem exists of course when origin tags are only 32
2347 bits, but it's less extreme.
2349 2. (More compelling) it doubles the size of the origin shadow
2350 memory. Given that the shadow memory is organised as a fixed
2351 size cache, and that accuracy of tracking is limited by origins
2352 falling out the cache due to space conflicts, this isn't good.
2354 > Another question: is the origin tracking perfect, or are there
2355 > cases where it fails to determine an origin?
2357 It is imperfect for at least for the following reasons, and
2360 * Insufficient capacity in the origin cache. When a line is
2361 evicted from the cache it is gone forever, and so subsequent
2362 queries for the line produce zero, indicating no origin
2363 information. Interestingly, a line containing all zeroes can be
2364 evicted "free" from the cache, since it contains no useful
2365 information, so there is scope perhaps for some cleverer cache
2366 management schemes. (*** NOTE, with the introduction of the
2367 second level origin tag cache, ocacheL2, this is no longer a
2370 * The origin cache only stores one otag per 32-bits of address
2371 space, plus 4 bits indicating which of the 4 bytes has that tag
2372 and which are considered defined. The result is that if two
2373 undefined bytes in the same word are stored in memory, the first
2374 stored byte's origin will be lost and replaced by the origin for
2377 * Nonzero origin tags for defined values. Consider a binary
2378 operator application op(x,y). Suppose y is undefined (and so has
2379 a valid nonzero origin tag), and x is defined, but erroneously
2380 has a nonzero origin tag (defined values should have tag zero).
2381 If the erroneous tag has a numeric value greater than y's tag,
2382 then the rule for propagating origin tags though binary
2383 operations, which is simply to take the unsigned max of the two
2384 tags, will erroneously propagate x's tag rather than y's.
2386 * Some obscure uses of x86/amd64 byte registers can cause lossage
2387 or confusion of origins. %AH .. %DH are treated as different
2388 from, and unrelated to, their parent registers, %EAX .. %EDX.
2389 So some weird sequences like
2391 movb undefined-value, %AH
2392 movb defined-value, %AL
2393 .. use %AX or %EAX ..
2395 will cause the origin attributed to %AH to be ignored, since %AL,
2396 %AX, %EAX are treated as the same register, and %AH as a
2397 completely separate one.
2399 But having said all that, it actually seems to work fairly well in
2403 static UWord stats_ocacheL1_find
= 0;
2404 static UWord stats_ocacheL1_found_at_1
= 0;
2405 static UWord stats_ocacheL1_found_at_N
= 0;
2406 static UWord stats_ocacheL1_misses
= 0;
2407 static UWord stats_ocacheL1_lossage
= 0;
2408 static UWord stats_ocacheL1_movefwds
= 0;
2410 static UWord stats__ocacheL2_finds
= 0;
2411 static UWord stats__ocacheL2_adds
= 0;
2412 static UWord stats__ocacheL2_dels
= 0;
2413 static UWord stats__ocacheL2_misses
= 0;
2414 static UWord stats__ocacheL2_n_nodes_max
= 0;
2416 /* Cache of 32-bit values, one every 32 bits of address space */
2418 #define OC_BITS_PER_LINE 5
2419 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2421 static INLINE UWord
oc_line_offset ( Addr a
) {
2422 return (a
>> 2) & (OC_W32S_PER_LINE
- 1);
2424 static INLINE Bool
is_valid_oc_tag ( Addr tag
) {
2425 return 0 == (tag
& ((1 << OC_BITS_PER_LINE
) - 1));
2428 #define OC_LINES_PER_SET 2
2430 #define OC_N_SET_BITS 20
2431 #define OC_N_SETS (1 << OC_N_SET_BITS)
2433 /* These settings give:
2434 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2435 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2438 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2441 /* Originally (pre Dec 2021) it was the case that this code had a
2442 parameterizable cache line size, set by changing OC_BITS_PER_LINE.
2443 However, as a result of the speedup fixes necessitated by bug 446103, that
2444 is no longer really the case, and much of the L1 and L2 cache code has been
2445 tuned specifically for the case OC_BITS_PER_LINE == 5 (that is, the line
2446 size is 32 bytes). Changing that would require a bunch of re-tuning
2447 effort. So let's set it in stone for now. */
2448 STATIC_ASSERT(OC_BITS_PER_LINE
== 5);
2449 STATIC_ASSERT(OC_LINES_PER_SET
== 2);
2451 /* Fundamentally we want an OCacheLine structure (see below) as follows:
2454 UInt w32 [OC_W32S_PER_LINE];
2455 UChar descr[OC_W32S_PER_LINE];
2457 However, in various places, we want to set the w32[] and descr[] arrays to
2458 zero, or check if they are zero. This can be a very hot path (per bug
2459 446103). So, instead, we have a union which is either those two arrays
2460 (OCacheLine_Main) or simply an array of ULongs (OCacheLine_W64s). For the
2461 set-zero/test-zero operations, the OCacheLine_W64s are used.
2464 // To ensure that OCacheLine.descr[] will fit in an integral number of ULongs.
2465 STATIC_ASSERT(0 == (OC_W32S_PER_LINE
% 8));
2467 #define OC_W64S_PER_MAIN /* "MAIN" meaning "struct OCacheLine_Main" */ \
2468 (OC_W32S_PER_LINE / 2 /* covers OCacheLine_Main.w32[] */ \
2469 + OC_W32S_PER_LINE / 8) /* covers OCacheLine_Main.descr[] */
2470 STATIC_ASSERT(OC_W64S_PER_MAIN
== 5);
2473 ULong OCacheLine_W64s
[OC_W64S_PER_MAIN
];
2477 UInt w32
[OC_W32S_PER_LINE
];
2478 UChar descr
[OC_W32S_PER_LINE
];
2482 STATIC_ASSERT(sizeof(OCacheLine_W64s
) == sizeof(OCacheLine_Main
));
2488 OCacheLine_W64s w64s
;
2489 OCacheLine_Main main
;
2494 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2495 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2496 and 'z' if all the represented tags are zero. */
2497 static inline UChar
classify_OCacheLine ( OCacheLine
* line
)
2500 if (line
->tag
== 1/*invalid*/)
2501 return 'e'; /* EMPTY */
2502 tl_assert(is_valid_oc_tag(line
->tag
));
2504 // BEGIN fast special-case of the test loop below. This will detect
2505 // zero-ness (case 'z') for a subset of cases that the loop below will,
2507 if (OC_W64S_PER_MAIN
== 5) {
2508 if (line
->u
.w64s
[0] == 0
2509 && line
->u
.w64s
[1] == 0 && line
->u
.w64s
[2] == 0
2510 && line
->u
.w64s
[3] == 0 && line
->u
.w64s
[4] == 0) {
2514 tl_assert2(0, "unsupported line size (classify_OCacheLine)");
2516 // END fast special-case of the test loop below.
2518 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2519 tl_assert(0 == ((~0xF) & line
->u
.main
.descr
[i
]));
2520 if (line
->u
.main
.w32
[i
] > 0 && line
->u
.main
.descr
[i
] > 0)
2521 return 'n'; /* NONZERO - contains useful info */
2523 return 'z'; /* ZERO - no useful info */
2528 OCacheLine line
[OC_LINES_PER_SET
];
2534 OCacheSet set
[OC_N_SETS
];
2538 static OCache
* ocacheL1
= NULL
;
2539 static UWord ocacheL1_event_ctr
= 0;
2541 static void init_ocacheL2 ( void ); /* fwds */
2542 static void init_OCache ( void )
2545 tl_assert(MC_(clo_mc_level
) >= 3);
2546 tl_assert(ocacheL1
== NULL
);
2547 SysRes sres
= VG_(am_shadow_alloc
)(sizeof(OCache
));
2548 if (sr_isError(sres
)) {
2549 VG_(out_of_memory_NORETURN
)( "memcheck:allocating ocacheL1",
2550 sizeof(OCache
), sr_Err(sres
) );
2552 ocacheL1
= (void *)(Addr
)sr_Res(sres
);
2553 tl_assert(ocacheL1
!= NULL
);
2554 for (set
= 0; set
< OC_N_SETS
; set
++) {
2555 for (line
= 0; line
< OC_LINES_PER_SET
; line
++) {
2556 ocacheL1
->set
[set
].line
[line
].tag
= 1/*invalid*/;
2562 static inline void moveLineForwards ( OCacheSet
* set
, UWord lineno
)
2565 stats_ocacheL1_movefwds
++;
2566 tl_assert(lineno
> 0 && lineno
< OC_LINES_PER_SET
);
2567 tmp
= set
->line
[lineno
-1];
2568 set
->line
[lineno
-1] = set
->line
[lineno
];
2569 set
->line
[lineno
] = tmp
;
2572 static inline void zeroise_OCacheLine ( OCacheLine
* line
, Addr tag
) {
2574 if (OC_W32S_PER_LINE
== 8) {
2575 // BEGIN fast special-case of the loop below
2576 tl_assert(OC_W64S_PER_MAIN
== 5);
2577 line
->u
.w64s
[0] = 0;
2578 line
->u
.w64s
[1] = 0;
2579 line
->u
.w64s
[2] = 0;
2580 line
->u
.w64s
[3] = 0;
2581 line
->u
.w64s
[4] = 0;
2582 // END fast special-case of the loop below
2584 tl_assert2(0, "unsupported line size (zeroise_OCacheLine)");
2585 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2586 line
->u
.main
.w32
[i
] = 0; /* NO ORIGIN */
2587 line
->u
.main
.descr
[i
] = 0; /* REALLY REALLY NO ORIGIN! */
2593 //////////////////////////////////////////////////////////////
2594 //// OCache backing store
2596 // The backing store for ocacheL1 is, conceptually, an AVL tree of lines that
2597 // got ejected from the L1 (a "victim cache"), and which actually contain
2598 // useful info -- that is, for which classify_OCacheLine would return 'n' and
2599 // no other value. However, the tree can grow large, and searching/updating
2600 // it can be hot paths. Hence we "take out" 12 significant bits of the key by
2601 // having 4096 trees, and select one using HASH_OCACHE_TAG.
2603 // What that hash function returns isn't important so long as it is a pure
2604 // function of the tag values, and is < 4096. However, it is critical for
2605 // performance of long SARPs. Hence the extra shift of 11 bits. This means
2606 // each tree conceptually is assigned to contiguous sequences of 2048 lines in
2607 // the "line address space", giving some locality of reference when scanning
2608 // linearly through address space, as is done by a SARP. Changing that 11 to
2609 // 0 gives terrible performance on long SARPs, presumably because each new
2610 // line is in a different tree, hence we wind up thrashing the (CPU's) caches.
2612 // On 32-bit targets, we have to be a bit careful not to shift out so many
2613 // bits that not all 2^12 trees get used. That leads to the constraint
2614 // (OC_BITS_PER_LINE + 11 + 12) < 32. Note that the 11 is the only thing we
2615 // can change here. In this case we have OC_BITS_PER_LINE == 5, hence the
2616 // inequality is (28 < 32) and so we're good.
2618 // The value 11 was determined empirically from various Firefox runs. 10 or
2619 // 12 also work pretty well.
2621 static OSet
* ocachesL2
[4096];
2623 STATIC_ASSERT((OC_BITS_PER_LINE
+ 11 + 12) < 32);
2624 static inline UInt
HASH_OCACHE_TAG ( Addr tag
) {
2625 return (UInt
)((tag
>> (OC_BITS_PER_LINE
+ 11)) & 0xFFF);
2628 static void* ocacheL2_malloc ( const HChar
* cc
, SizeT szB
) {
2629 return VG_(malloc
)(cc
, szB
);
2631 static void ocacheL2_free ( void* v
) {
2635 /* Stats: # nodes currently in tree */
2636 static UWord stats__ocacheL2_n_nodes
= 0;
2638 static void init_ocacheL2 ( void )
2640 tl_assert(sizeof(Word
) == sizeof(Addr
)); /* since OCacheLine.tag :: Addr */
2641 tl_assert(0 == offsetof(OCacheLine
,tag
));
2642 for (UInt i
= 0; i
< 4096; i
++) {
2643 tl_assert(!ocachesL2
[i
]);
2645 = VG_(OSetGen_Create
)( offsetof(OCacheLine
,tag
),
2646 NULL
, /* fast cmp */
2647 ocacheL2_malloc
, "mc.ioL2", ocacheL2_free
);
2649 stats__ocacheL2_n_nodes
= 0;
2652 /* Find line with the given tag in the tree, or NULL if not found. */
2653 static inline OCacheLine
* ocacheL2_find_tag ( Addr tag
)
2656 tl_assert(is_valid_oc_tag(tag
));
2657 stats__ocacheL2_finds
++;
2658 OSet
* oset
= ocachesL2
[HASH_OCACHE_TAG(tag
)];
2659 line
= VG_(OSetGen_Lookup
)( oset
, &tag
);
2663 /* Delete the line with the given tag from the tree, if it is present, and
2664 free up the associated memory. */
2665 static void ocacheL2_del_tag ( Addr tag
)
2668 tl_assert(is_valid_oc_tag(tag
));
2669 stats__ocacheL2_dels
++;
2670 OSet
* oset
= ocachesL2
[HASH_OCACHE_TAG(tag
)];
2671 line
= VG_(OSetGen_Remove
)( oset
, &tag
);
2673 VG_(OSetGen_FreeNode
)(oset
, line
);
2674 tl_assert(stats__ocacheL2_n_nodes
> 0);
2675 stats__ocacheL2_n_nodes
--;
2679 /* Add a copy of the given line to the tree. It must not already be
2681 static void ocacheL2_add_line ( OCacheLine
* line
)
2684 tl_assert(is_valid_oc_tag(line
->tag
));
2685 OSet
* oset
= ocachesL2
[HASH_OCACHE_TAG(line
->tag
)];
2686 copy
= VG_(OSetGen_AllocNode
)( oset
, sizeof(OCacheLine
) );
2688 stats__ocacheL2_adds
++;
2689 VG_(OSetGen_Insert
)( oset
, copy
);
2690 stats__ocacheL2_n_nodes
++;
2691 if (stats__ocacheL2_n_nodes
> stats__ocacheL2_n_nodes_max
)
2692 stats__ocacheL2_n_nodes_max
= stats__ocacheL2_n_nodes
;
2696 //////////////////////////////////////////////////////////////
2698 __attribute__((noinline
))
2699 static OCacheLine
* find_OCacheLine_SLOW ( Addr a
)
2701 OCacheLine
*victim
, *inL2
;
2704 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2705 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2706 UWord tag
= a
& tagmask
;
2707 tl_assert(setno
< OC_N_SETS
);
2709 /* we already tried line == 0; skip therefore. */
2710 for (line
= 1; line
< OC_LINES_PER_SET
; line
++) {
2711 if (ocacheL1
->set
[setno
].line
[line
].tag
== tag
) {
2713 stats_ocacheL1_found_at_1
++;
2715 stats_ocacheL1_found_at_N
++;
2717 if (UNLIKELY(0 == (ocacheL1_event_ctr
++
2718 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS
)-1)))) {
2719 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2722 return &ocacheL1
->set
[setno
].line
[line
];
2726 /* A miss. Use the last slot. Implicitly this means we're
2727 ejecting the line in the last slot. */
2728 stats_ocacheL1_misses
++;
2729 tl_assert(line
== OC_LINES_PER_SET
);
2731 tl_assert(line
> 0);
2733 /* First, move the to-be-ejected line to the L2 cache. */
2734 victim
= &ocacheL1
->set
[setno
].line
[line
];
2735 c
= classify_OCacheLine(victim
);
2738 /* the line is empty (has invalid tag); ignore it. */
2741 /* line contains zeroes. We must ensure the backing store is
2742 updated accordingly, either by copying the line there
2743 verbatim, or by ensuring it isn't present there. We
2744 choose the latter on the basis that it reduces the size of
2745 the backing store. */
2746 ocacheL2_del_tag( victim
->tag
);
2749 /* line contains at least one real, useful origin. Copy it
2750 to the backing store. */
2751 stats_ocacheL1_lossage
++;
2752 inL2
= ocacheL2_find_tag( victim
->tag
);
2756 ocacheL2_add_line( victim
);
2763 /* Now we must reload the L1 cache from the backing tree, if
2765 tl_assert(tag
!= victim
->tag
); /* stay sane */
2766 inL2
= ocacheL2_find_tag( tag
);
2768 /* We're in luck. It's in the L2. */
2769 ocacheL1
->set
[setno
].line
[line
] = *inL2
;
2771 /* Missed at both levels of the cache hierarchy. We have to
2772 declare it as full of zeroes (unknown origins). */
2773 stats__ocacheL2_misses
++;
2774 zeroise_OCacheLine( &ocacheL1
->set
[setno
].line
[line
], tag
);
2777 /* Move it one forwards */
2778 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2781 return &ocacheL1
->set
[setno
].line
[line
];
2784 static INLINE OCacheLine
* find_OCacheLine ( Addr a
)
2786 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2787 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2788 UWord tag
= a
& tagmask
;
2790 stats_ocacheL1_find
++;
2792 if (OC_ENABLE_ASSERTIONS
) {
2793 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2794 tl_assert(0 == (tag
& (4 * OC_W32S_PER_LINE
- 1)));
2797 if (LIKELY(ocacheL1
->set
[setno
].line
[0].tag
== tag
)) {
2798 return &ocacheL1
->set
[setno
].line
[0];
2801 return find_OCacheLine_SLOW( a
);
2804 static INLINE
void set_aligned_word64_Origin_to_undef ( Addr a
, UInt otag
)
2806 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2807 //// Set the origins for a+0 .. a+7
2809 UWord lineoff
= oc_line_offset(a
);
2810 if (OC_ENABLE_ASSERTIONS
) {
2811 tl_assert(lineoff
>= 0
2812 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2814 line
= find_OCacheLine( a
);
2815 line
->u
.main
.descr
[lineoff
+0] = 0xF;
2816 line
->u
.main
.descr
[lineoff
+1] = 0xF;
2817 line
->u
.main
.w32
[lineoff
+0] = otag
;
2818 line
->u
.main
.w32
[lineoff
+1] = otag
;
2820 //// END inlined, specialised version of MC_(helperc_b_store8)
2824 /*------------------------------------------------------------*/
2825 /*--- Aligned fast case permission setters, ---*/
2826 /*--- for dealing with stacks ---*/
2827 /*------------------------------------------------------------*/
2829 /*--------------------- 32-bit ---------------------*/
2831 /* Nb: by "aligned" here we mean 4-byte aligned */
2833 static INLINE
void make_aligned_word32_undefined ( Addr a
)
2835 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
);
2837 #ifndef PERF_FAST_STACK2
2838 make_mem_undefined(a
, 4);
2844 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2845 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
);
2846 make_mem_undefined(a
, 4);
2850 sm
= get_secmap_for_writing_low(a
);
2852 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
2858 void make_aligned_word32_undefined_w_otag ( Addr a
, UInt otag
)
2860 make_aligned_word32_undefined(a
);
2861 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2862 //// Set the origins for a+0 .. a+3
2864 UWord lineoff
= oc_line_offset(a
);
2865 if (OC_ENABLE_ASSERTIONS
) {
2866 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2868 line
= find_OCacheLine( a
);
2869 line
->u
.main
.descr
[lineoff
] = 0xF;
2870 line
->u
.main
.w32
[lineoff
] = otag
;
2872 //// END inlined, specialised version of MC_(helperc_b_store4)
2876 void make_aligned_word32_noaccess ( Addr a
)
2878 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS
);
2880 #ifndef PERF_FAST_STACK2
2881 MC_(make_mem_noaccess
)(a
, 4);
2887 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2888 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
);
2889 MC_(make_mem_noaccess
)(a
, 4);
2893 sm
= get_secmap_for_writing_low(a
);
2895 sm
->vabits8
[sm_off
] = VA_BITS8_NOACCESS
;
2897 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2898 //// Set the origins for a+0 .. a+3.
2899 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2901 UWord lineoff
= oc_line_offset(a
);
2902 if (OC_ENABLE_ASSERTIONS
) {
2903 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2905 line
= find_OCacheLine( a
);
2906 line
->u
.main
.descr
[lineoff
] = 0;
2908 //// END inlined, specialised version of MC_(helperc_b_store4)
2913 /*--------------------- 64-bit ---------------------*/
2915 /* Nb: by "aligned" here we mean 8-byte aligned */
2917 static INLINE
void make_aligned_word64_undefined ( Addr a
)
2919 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
);
2921 #ifndef PERF_FAST_STACK2
2922 make_mem_undefined(a
, 8);
2928 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2929 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
);
2930 make_mem_undefined(a
, 8);
2934 sm
= get_secmap_for_writing_low(a
);
2935 sm_off16
= SM_OFF_16(a
);
2936 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
2942 void make_aligned_word64_undefined_w_otag ( Addr a
, UInt otag
)
2944 make_aligned_word64_undefined(a
);
2945 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2946 //// Set the origins for a+0 .. a+7
2948 UWord lineoff
= oc_line_offset(a
);
2949 tl_assert(lineoff
>= 0
2950 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2951 line
= find_OCacheLine( a
);
2952 line
->u
.main
.descr
[lineoff
+0] = 0xF;
2953 line
->u
.main
.descr
[lineoff
+1] = 0xF;
2954 line
->u
.main
.w32
[lineoff
+0] = otag
;
2955 line
->u
.main
.w32
[lineoff
+1] = otag
;
2957 //// END inlined, specialised version of MC_(helperc_b_store8)
2961 void make_aligned_word64_noaccess ( Addr a
)
2963 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS
);
2965 #ifndef PERF_FAST_STACK2
2966 MC_(make_mem_noaccess
)(a
, 8);
2972 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2973 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
);
2974 MC_(make_mem_noaccess
)(a
, 8);
2978 sm
= get_secmap_for_writing_low(a
);
2979 sm_off16
= SM_OFF_16(a
);
2980 sm
->vabits16
[sm_off16
] = VA_BITS16_NOACCESS
;
2982 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2983 //// Clear the origins for a+0 .. a+7.
2984 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2986 UWord lineoff
= oc_line_offset(a
);
2987 tl_assert(lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2988 line
= find_OCacheLine( a
);
2989 line
->u
.main
.descr
[lineoff
+0] = 0;
2990 line
->u
.main
.descr
[lineoff
+1] = 0;
2992 //// END inlined, specialised version of MC_(helperc_b_store8)
2998 /*------------------------------------------------------------*/
2999 /*--- Stack pointer adjustment ---*/
3000 /*------------------------------------------------------------*/
3002 #ifdef PERF_FAST_STACK
3005 # define MAYBE_USED __attribute__((unused))
3008 /*--------------- adjustment by 4 bytes ---------------*/
3011 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP
, UInt ecu
)
3013 UInt otag
= ecu
| MC_OKIND_STACK
;
3014 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
3015 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3016 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3018 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4, otag
);
3023 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP
)
3025 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
3026 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3027 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3029 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4 );
3034 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP
)
3036 PROF_EVENT(MCPE_DIE_MEM_STACK_4
);
3037 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3038 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3040 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-4, 4 );
3044 /*--------------- adjustment by 8 bytes ---------------*/
3047 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP
, UInt ecu
)
3049 UInt otag
= ecu
| MC_OKIND_STACK
;
3050 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
3051 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3052 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3053 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3054 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3055 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
3057 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8, otag
);
3062 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP
)
3064 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
3065 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3066 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3067 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3068 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3069 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3071 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8 );
3076 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP
)
3078 PROF_EVENT(MCPE_DIE_MEM_STACK_8
);
3079 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3080 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3081 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3082 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3083 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3085 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-8, 8 );
3089 /*--------------- adjustment by 12 bytes ---------------*/
3092 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP
, UInt ecu
)
3094 UInt otag
= ecu
| MC_OKIND_STACK
;
3095 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
3096 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3097 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3098 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3099 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3100 /* from previous test we don't have 8-alignment at offset +0,
3101 hence must have 8 alignment at offsets +4/-4. Hence safe to
3102 do 4 at +0 and then 8 at +4/. */
3103 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3104 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
3106 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12, otag
);
3111 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP
)
3113 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
3114 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3115 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3116 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3117 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3118 /* from previous test we don't have 8-alignment at offset +0,
3119 hence must have 8 alignment at offsets +4/-4. Hence safe to
3120 do 4 at +0 and then 8 at +4/. */
3121 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3122 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3124 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12 );
3129 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP
)
3131 PROF_EVENT(MCPE_DIE_MEM_STACK_12
);
3132 /* Note the -12 in the test */
3133 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
-12 )) {
3134 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3136 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3137 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3138 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3139 /* We have 4-alignment at +0, but we don't have 8-alignment at
3140 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3141 and then 8 at -8. */
3142 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3143 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3145 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-12, 12 );
3149 /*--------------- adjustment by 16 bytes ---------------*/
3152 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP
, UInt ecu
)
3154 UInt otag
= ecu
| MC_OKIND_STACK
;
3155 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3156 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3157 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3158 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3159 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3160 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3161 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3162 Hence do 4 at +0, 8 at +4, 4 at +12. */
3163 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3164 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3165 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3167 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16, otag
);
3172 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP
)
3174 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3175 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3176 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3177 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3178 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3179 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3180 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3181 Hence do 4 at +0, 8 at +4, 4 at +12. */
3182 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3183 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3184 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3186 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16 );
3191 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP
)
3193 PROF_EVENT(MCPE_DIE_MEM_STACK_16
);
3194 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3195 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3196 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3197 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3198 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3199 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3200 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3201 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3202 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3204 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-16, 16 );
3208 /*--------------- adjustment by 32 bytes ---------------*/
3211 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP
, UInt ecu
)
3213 UInt otag
= ecu
| MC_OKIND_STACK
;
3214 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3215 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3216 /* Straightforward */
3217 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3218 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3219 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3220 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3221 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3222 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3224 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3225 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3226 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3227 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+20, otag
);
3228 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+28, otag
);
3230 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32, otag
);
3235 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP
)
3237 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3238 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3239 /* Straightforward */
3240 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3241 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3242 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3243 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3244 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3245 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3247 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3248 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3249 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3250 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+20 );
3251 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+28 );
3253 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32 );
3258 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP
)
3260 PROF_EVENT(MCPE_DIE_MEM_STACK_32
);
3261 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3262 /* Straightforward */
3263 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3264 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3265 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3266 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3267 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3268 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3270 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3271 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-28 );
3272 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-20 );
3273 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3274 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3276 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-32, 32 );
3280 /*--------------- adjustment by 112 bytes ---------------*/
3283 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP
, UInt ecu
)
3285 UInt otag
= ecu
| MC_OKIND_STACK
;
3286 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3287 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3288 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3293 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3294 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3295 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3296 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3297 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3298 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3299 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3300 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3301 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3303 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112, otag
);
3308 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP
)
3310 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3311 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3312 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3321 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3322 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3323 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3324 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3325 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3327 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112 );
3332 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP
)
3334 PROF_EVENT(MCPE_DIE_MEM_STACK_112
);
3335 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3336 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3349 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3351 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-112, 112 );
3355 /*--------------- adjustment by 128 bytes ---------------*/
3358 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP
, UInt ecu
)
3360 UInt otag
= ecu
| MC_OKIND_STACK
;
3361 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3362 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3363 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3380 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128, otag
);
3385 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP
)
3387 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3388 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3389 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3390 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3391 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3406 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128 );
3411 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP
)
3413 PROF_EVENT(MCPE_DIE_MEM_STACK_128
);
3414 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3415 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3416 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3417 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3418 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3419 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3420 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3421 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3432 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-128, 128 );
3436 /*--------------- adjustment by 144 bytes ---------------*/
3439 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP
, UInt ecu
)
3441 UInt otag
= ecu
| MC_OKIND_STACK
;
3442 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3443 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3444 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3445 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3446 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3447 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3448 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3449 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3450 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3451 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3452 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3453 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3454 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3455 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3456 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3457 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3458 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3459 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3460 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3461 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3463 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144, otag
);
3468 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP
)
3470 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3471 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3472 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3473 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3474 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3475 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3476 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3477 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3478 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3479 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3480 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3481 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3482 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3483 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3484 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3485 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3486 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3487 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3488 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3489 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3491 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144 );
3496 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP
)
3498 PROF_EVENT(MCPE_DIE_MEM_STACK_144
);
3499 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3500 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3501 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3502 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3503 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3504 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3505 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3506 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3507 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3508 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3509 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3510 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3511 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3512 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3513 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3514 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3515 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3516 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3517 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3519 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-144, 144 );
3523 /*--------------- adjustment by 160 bytes ---------------*/
3526 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP
, UInt ecu
)
3528 UInt otag
= ecu
| MC_OKIND_STACK
;
3529 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3530 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3531 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3532 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3533 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3534 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3535 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3536 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3537 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3538 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3539 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3540 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3541 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3542 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3543 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3544 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3545 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3546 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3547 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3548 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3549 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+144, otag
);
3550 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+152, otag
);
3552 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160, otag
);
3557 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP
)
3559 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3560 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3561 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3562 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3563 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3564 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3565 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3566 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3567 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3568 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3569 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3570 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3571 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3572 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3573 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3574 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3575 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3576 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3577 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3578 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3579 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+144 );
3580 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+152 );
3582 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160 );
3587 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP
)
3589 PROF_EVENT(MCPE_DIE_MEM_STACK_160
);
3590 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3591 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-160);
3592 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-152);
3593 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3594 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3595 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3596 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3597 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3598 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3599 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3600 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3601 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3602 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3603 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3604 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3605 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3606 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3607 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3608 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3609 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3610 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3612 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-160, 160 );
3616 /*--------------- adjustment by N bytes ---------------*/
3618 static void mc_new_mem_stack_w_ECU ( Addr a
, SizeT len
, UInt ecu
)
3620 UInt otag
= ecu
| MC_OKIND_STACK
;
3621 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3622 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ a
, len
, otag
);
3625 static void mc_new_mem_stack ( Addr a
, SizeT len
)
3627 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3628 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3631 static void mc_die_mem_stack ( Addr a
, SizeT len
)
3633 PROF_EVENT(MCPE_DIE_MEM_STACK
);
3634 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3638 /* The AMD64 ABI says:
3640 "The 128-byte area beyond the location pointed to by %rsp is considered
3641 to be reserved and shall not be modified by signal or interrupt
3642 handlers. Therefore, functions may use this area for temporary data
3643 that is not needed across function calls. In particular, leaf functions
3644 may use this area for their entire stack frame, rather than adjusting
3645 the stack pointer in the prologue and epilogue. This area is known as
3648 So after any call or return we need to mark this redzone as containing
3651 Consider this: we're in function f. f calls g. g moves rsp down
3652 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3653 defined. g returns. f is buggy and reads from parts of the red zone
3654 that it didn't write on. But because g filled that area in, f is going
3655 to be picking up defined V bits and so any errors from reading bits of
3656 the red zone it didn't write, will be missed. The only solution I could
3657 think of was to make the red zone undefined when g returns to f.
3659 This is in accordance with the ABI, which makes it clear the redzone
3660 is volatile across function calls.
3662 The problem occurs the other way round too: f could fill the RZ up
3663 with defined values and g could mistakenly read them. So the RZ
3664 also needs to be nuked on function calls.
3668 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3669 improved so as to have a lower miss rate. */
3671 static UWord stats__nia_cache_queries
= 0;
3672 static UWord stats__nia_cache_misses
= 0;
3675 struct { UWord nia0
; UWord ecu0
; /* nia0 maps to ecu0 */
3676 UWord nia1
; UWord ecu1
; } /* nia1 maps to ecu1 */
3679 #define N_NIA_TO_ECU_CACHE 511
3681 static WCacheEnt nia_to_ecu_cache
[N_NIA_TO_ECU_CACHE
];
3683 static void init_nia_to_ecu_cache ( void )
3687 ExeContext
* zero_ec
;
3689 /* Fill all the slots with an entry for address zero, and the
3690 relevant otags accordingly. Hence the cache is initially filled
3692 zero_ec
= VG_(make_depth_1_ExeContext_from_Addr
)(zero_addr
);
3694 zero_ecu
= VG_(get_ECU_from_ExeContext
)(zero_ec
);
3695 tl_assert(VG_(is_plausible_ECU
)(zero_ecu
));
3696 for (i
= 0; i
< N_NIA_TO_ECU_CACHE
; i
++) {
3697 nia_to_ecu_cache
[i
].nia0
= zero_addr
;
3698 nia_to_ecu_cache
[i
].ecu0
= zero_ecu
;
3699 nia_to_ecu_cache
[i
].nia1
= zero_addr
;
3700 nia_to_ecu_cache
[i
].ecu1
= zero_ecu
;
3704 static inline UInt
convert_nia_to_ecu ( Addr nia
)
3710 tl_assert( sizeof(nia_to_ecu_cache
[0].nia1
) == sizeof(nia
) );
3712 stats__nia_cache_queries
++;
3713 i
= nia
% N_NIA_TO_ECU_CACHE
;
3714 tl_assert(i
>= 0 && i
< N_NIA_TO_ECU_CACHE
);
3716 if (LIKELY( nia_to_ecu_cache
[i
].nia0
== nia
))
3717 return nia_to_ecu_cache
[i
].ecu0
;
3719 if (LIKELY( nia_to_ecu_cache
[i
].nia1
== nia
)) {
3720 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3721 SWAP( nia_to_ecu_cache
[i
].nia0
, nia_to_ecu_cache
[i
].nia1
);
3722 SWAP( nia_to_ecu_cache
[i
].ecu0
, nia_to_ecu_cache
[i
].ecu1
);
3724 return nia_to_ecu_cache
[i
].ecu0
;
3727 stats__nia_cache_misses
++;
3728 ec
= VG_(make_depth_1_ExeContext_from_Addr
)(nia
);
3730 ecu
= VG_(get_ECU_from_ExeContext
)(ec
);
3731 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3733 nia_to_ecu_cache
[i
].nia1
= nia_to_ecu_cache
[i
].nia0
;
3734 nia_to_ecu_cache
[i
].ecu1
= nia_to_ecu_cache
[i
].ecu0
;
3736 nia_to_ecu_cache
[i
].nia0
= nia
;
3737 nia_to_ecu_cache
[i
].ecu0
= (UWord
)ecu
;
3742 /* This marks the stack as addressible but undefined, after a call or
3743 return for a target that has an ABI defined stack redzone. It
3744 happens quite a lot and needs to be fast. This is the version for
3745 origin tracking. The non-origin-tracking version is below. */
3747 void MC_(helperc_MAKE_STACK_UNINIT_w_o
) ( Addr base
, UWord len
, Addr nia
)
3749 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O
);
3751 VG_(printf
)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3754 UInt ecu
= convert_nia_to_ecu ( nia
);
3755 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3757 UInt otag
= ecu
| MC_OKIND_STACK
;
3760 /* Slow(ish) version, which is fairly easily seen to be correct.
3762 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3763 make_aligned_word64_undefined_w_otag(base
+ 0, otag
);
3764 make_aligned_word64_undefined_w_otag(base
+ 8, otag
);
3765 make_aligned_word64_undefined_w_otag(base
+ 16, otag
);
3766 make_aligned_word64_undefined_w_otag(base
+ 24, otag
);
3768 make_aligned_word64_undefined_w_otag(base
+ 32, otag
);
3769 make_aligned_word64_undefined_w_otag(base
+ 40, otag
);
3770 make_aligned_word64_undefined_w_otag(base
+ 48, otag
);
3771 make_aligned_word64_undefined_w_otag(base
+ 56, otag
);
3773 make_aligned_word64_undefined_w_otag(base
+ 64, otag
);
3774 make_aligned_word64_undefined_w_otag(base
+ 72, otag
);
3775 make_aligned_word64_undefined_w_otag(base
+ 80, otag
);
3776 make_aligned_word64_undefined_w_otag(base
+ 88, otag
);
3778 make_aligned_word64_undefined_w_otag(base
+ 96, otag
);
3779 make_aligned_word64_undefined_w_otag(base
+ 104, otag
);
3780 make_aligned_word64_undefined_w_otag(base
+ 112, otag
);
3781 make_aligned_word64_undefined_w_otag(base
+ 120, otag
);
3783 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3787 /* Idea is: go fast when
3788 * 8-aligned and length is 128
3789 * the sm is available in the main primary map
3790 * the address range falls entirely with a single secondary map
3791 If all those conditions hold, just update the V+A bits by writing
3792 directly into the vabits array. (If the sm was distinguished, this
3793 will make a copy and then write to it.)
3795 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3796 /* Now we know the address range is suitably sized and aligned. */
3797 UWord a_lo
= (UWord
)(base
);
3798 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3799 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3800 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3801 /* Now we know the entire range is within the main primary map. */
3802 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3803 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3804 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3805 /* Now we know that the entire address range falls within a
3806 single secondary map, and that that secondary 'lives' in
3807 the main primary map. */
3808 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3809 UWord v_off16
= SM_OFF_16(a_lo
);
3810 UShort
* p
= &sm
->vabits16
[v_off16
];
3811 p
[ 0] = VA_BITS16_UNDEFINED
;
3812 p
[ 1] = VA_BITS16_UNDEFINED
;
3813 p
[ 2] = VA_BITS16_UNDEFINED
;
3814 p
[ 3] = VA_BITS16_UNDEFINED
;
3815 p
[ 4] = VA_BITS16_UNDEFINED
;
3816 p
[ 5] = VA_BITS16_UNDEFINED
;
3817 p
[ 6] = VA_BITS16_UNDEFINED
;
3818 p
[ 7] = VA_BITS16_UNDEFINED
;
3819 p
[ 8] = VA_BITS16_UNDEFINED
;
3820 p
[ 9] = VA_BITS16_UNDEFINED
;
3821 p
[10] = VA_BITS16_UNDEFINED
;
3822 p
[11] = VA_BITS16_UNDEFINED
;
3823 p
[12] = VA_BITS16_UNDEFINED
;
3824 p
[13] = VA_BITS16_UNDEFINED
;
3825 p
[14] = VA_BITS16_UNDEFINED
;
3826 p
[15] = VA_BITS16_UNDEFINED
;
3827 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3828 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3829 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3830 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3831 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3832 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3833 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3834 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3835 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3836 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3837 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3838 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3839 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3840 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3841 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3842 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3848 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3849 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3850 /* Now we know the address range is suitably sized and aligned. */
3851 UWord a_lo
= (UWord
)(base
);
3852 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3853 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3854 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3855 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3856 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3857 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3858 /* Now we know that the entire address range falls within a
3859 single secondary map, and that that secondary 'lives' in
3860 the main primary map. */
3861 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3862 UWord v_off16
= SM_OFF_16(a_lo
);
3863 UShort
* p
= &sm
->vabits16
[v_off16
];
3864 p
[ 0] = VA_BITS16_UNDEFINED
;
3865 p
[ 1] = VA_BITS16_UNDEFINED
;
3866 p
[ 2] = VA_BITS16_UNDEFINED
;
3867 p
[ 3] = VA_BITS16_UNDEFINED
;
3868 p
[ 4] = VA_BITS16_UNDEFINED
;
3869 p
[ 5] = VA_BITS16_UNDEFINED
;
3870 p
[ 6] = VA_BITS16_UNDEFINED
;
3871 p
[ 7] = VA_BITS16_UNDEFINED
;
3872 p
[ 8] = VA_BITS16_UNDEFINED
;
3873 p
[ 9] = VA_BITS16_UNDEFINED
;
3874 p
[10] = VA_BITS16_UNDEFINED
;
3875 p
[11] = VA_BITS16_UNDEFINED
;
3876 p
[12] = VA_BITS16_UNDEFINED
;
3877 p
[13] = VA_BITS16_UNDEFINED
;
3878 p
[14] = VA_BITS16_UNDEFINED
;
3879 p
[15] = VA_BITS16_UNDEFINED
;
3880 p
[16] = VA_BITS16_UNDEFINED
;
3881 p
[17] = VA_BITS16_UNDEFINED
;
3882 p
[18] = VA_BITS16_UNDEFINED
;
3883 p
[19] = VA_BITS16_UNDEFINED
;
3884 p
[20] = VA_BITS16_UNDEFINED
;
3885 p
[21] = VA_BITS16_UNDEFINED
;
3886 p
[22] = VA_BITS16_UNDEFINED
;
3887 p
[23] = VA_BITS16_UNDEFINED
;
3888 p
[24] = VA_BITS16_UNDEFINED
;
3889 p
[25] = VA_BITS16_UNDEFINED
;
3890 p
[26] = VA_BITS16_UNDEFINED
;
3891 p
[27] = VA_BITS16_UNDEFINED
;
3892 p
[28] = VA_BITS16_UNDEFINED
;
3893 p
[29] = VA_BITS16_UNDEFINED
;
3894 p
[30] = VA_BITS16_UNDEFINED
;
3895 p
[31] = VA_BITS16_UNDEFINED
;
3896 p
[32] = VA_BITS16_UNDEFINED
;
3897 p
[33] = VA_BITS16_UNDEFINED
;
3898 p
[34] = VA_BITS16_UNDEFINED
;
3899 p
[35] = VA_BITS16_UNDEFINED
;
3900 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3901 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3902 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3903 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3904 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3905 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3906 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3907 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3908 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3909 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3910 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3911 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3912 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3913 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3914 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3915 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3916 set_aligned_word64_Origin_to_undef( base
+ 8 * 16, otag
);
3917 set_aligned_word64_Origin_to_undef( base
+ 8 * 17, otag
);
3918 set_aligned_word64_Origin_to_undef( base
+ 8 * 18, otag
);
3919 set_aligned_word64_Origin_to_undef( base
+ 8 * 19, otag
);
3920 set_aligned_word64_Origin_to_undef( base
+ 8 * 20, otag
);
3921 set_aligned_word64_Origin_to_undef( base
+ 8 * 21, otag
);
3922 set_aligned_word64_Origin_to_undef( base
+ 8 * 22, otag
);
3923 set_aligned_word64_Origin_to_undef( base
+ 8 * 23, otag
);
3924 set_aligned_word64_Origin_to_undef( base
+ 8 * 24, otag
);
3925 set_aligned_word64_Origin_to_undef( base
+ 8 * 25, otag
);
3926 set_aligned_word64_Origin_to_undef( base
+ 8 * 26, otag
);
3927 set_aligned_word64_Origin_to_undef( base
+ 8 * 27, otag
);
3928 set_aligned_word64_Origin_to_undef( base
+ 8 * 28, otag
);
3929 set_aligned_word64_Origin_to_undef( base
+ 8 * 29, otag
);
3930 set_aligned_word64_Origin_to_undef( base
+ 8 * 30, otag
);
3931 set_aligned_word64_Origin_to_undef( base
+ 8 * 31, otag
);
3932 set_aligned_word64_Origin_to_undef( base
+ 8 * 32, otag
);
3933 set_aligned_word64_Origin_to_undef( base
+ 8 * 33, otag
);
3934 set_aligned_word64_Origin_to_undef( base
+ 8 * 34, otag
);
3935 set_aligned_word64_Origin_to_undef( base
+ 8 * 35, otag
);
3941 /* else fall into slow case */
3942 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3946 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3947 specialised for the non-origin-tracking case. */
3949 void MC_(helperc_MAKE_STACK_UNINIT_no_o
) ( Addr base
, UWord len
)
3951 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O
);
3953 VG_(printf
)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3957 /* Slow(ish) version, which is fairly easily seen to be correct.
3959 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3960 make_aligned_word64_undefined(base
+ 0);
3961 make_aligned_word64_undefined(base
+ 8);
3962 make_aligned_word64_undefined(base
+ 16);
3963 make_aligned_word64_undefined(base
+ 24);
3965 make_aligned_word64_undefined(base
+ 32);
3966 make_aligned_word64_undefined(base
+ 40);
3967 make_aligned_word64_undefined(base
+ 48);
3968 make_aligned_word64_undefined(base
+ 56);
3970 make_aligned_word64_undefined(base
+ 64);
3971 make_aligned_word64_undefined(base
+ 72);
3972 make_aligned_word64_undefined(base
+ 80);
3973 make_aligned_word64_undefined(base
+ 88);
3975 make_aligned_word64_undefined(base
+ 96);
3976 make_aligned_word64_undefined(base
+ 104);
3977 make_aligned_word64_undefined(base
+ 112);
3978 make_aligned_word64_undefined(base
+ 120);
3980 make_mem_undefined(base
, len
);
3984 /* Idea is: go fast when
3985 * 8-aligned and length is 128
3986 * the sm is available in the main primary map
3987 * the address range falls entirely with a single secondary map
3988 If all those conditions hold, just update the V+A bits by writing
3989 directly into the vabits array. (If the sm was distinguished, this
3990 will make a copy and then write to it.)
3992 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3993 /* Now we know the address range is suitably sized and aligned. */
3994 UWord a_lo
= (UWord
)(base
);
3995 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3996 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3997 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3998 /* Now we know the entire range is within the main primary map. */
3999 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4000 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4001 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4002 /* Now we know that the entire address range falls within a
4003 single secondary map, and that that secondary 'lives' in
4004 the main primary map. */
4005 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4006 UWord v_off16
= SM_OFF_16(a_lo
);
4007 UShort
* p
= &sm
->vabits16
[v_off16
];
4008 p
[ 0] = VA_BITS16_UNDEFINED
;
4009 p
[ 1] = VA_BITS16_UNDEFINED
;
4010 p
[ 2] = VA_BITS16_UNDEFINED
;
4011 p
[ 3] = VA_BITS16_UNDEFINED
;
4012 p
[ 4] = VA_BITS16_UNDEFINED
;
4013 p
[ 5] = VA_BITS16_UNDEFINED
;
4014 p
[ 6] = VA_BITS16_UNDEFINED
;
4015 p
[ 7] = VA_BITS16_UNDEFINED
;
4016 p
[ 8] = VA_BITS16_UNDEFINED
;
4017 p
[ 9] = VA_BITS16_UNDEFINED
;
4018 p
[10] = VA_BITS16_UNDEFINED
;
4019 p
[11] = VA_BITS16_UNDEFINED
;
4020 p
[12] = VA_BITS16_UNDEFINED
;
4021 p
[13] = VA_BITS16_UNDEFINED
;
4022 p
[14] = VA_BITS16_UNDEFINED
;
4023 p
[15] = VA_BITS16_UNDEFINED
;
4029 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
4030 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
4031 /* Now we know the address range is suitably sized and aligned. */
4032 UWord a_lo
= (UWord
)(base
);
4033 UWord a_hi
= (UWord
)(base
+ 288 - 1);
4034 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
4035 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
4036 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4037 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4038 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4039 /* Now we know that the entire address range falls within a
4040 single secondary map, and that that secondary 'lives' in
4041 the main primary map. */
4042 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4043 UWord v_off16
= SM_OFF_16(a_lo
);
4044 UShort
* p
= &sm
->vabits16
[v_off16
];
4045 p
[ 0] = VA_BITS16_UNDEFINED
;
4046 p
[ 1] = VA_BITS16_UNDEFINED
;
4047 p
[ 2] = VA_BITS16_UNDEFINED
;
4048 p
[ 3] = VA_BITS16_UNDEFINED
;
4049 p
[ 4] = VA_BITS16_UNDEFINED
;
4050 p
[ 5] = VA_BITS16_UNDEFINED
;
4051 p
[ 6] = VA_BITS16_UNDEFINED
;
4052 p
[ 7] = VA_BITS16_UNDEFINED
;
4053 p
[ 8] = VA_BITS16_UNDEFINED
;
4054 p
[ 9] = VA_BITS16_UNDEFINED
;
4055 p
[10] = VA_BITS16_UNDEFINED
;
4056 p
[11] = VA_BITS16_UNDEFINED
;
4057 p
[12] = VA_BITS16_UNDEFINED
;
4058 p
[13] = VA_BITS16_UNDEFINED
;
4059 p
[14] = VA_BITS16_UNDEFINED
;
4060 p
[15] = VA_BITS16_UNDEFINED
;
4061 p
[16] = VA_BITS16_UNDEFINED
;
4062 p
[17] = VA_BITS16_UNDEFINED
;
4063 p
[18] = VA_BITS16_UNDEFINED
;
4064 p
[19] = VA_BITS16_UNDEFINED
;
4065 p
[20] = VA_BITS16_UNDEFINED
;
4066 p
[21] = VA_BITS16_UNDEFINED
;
4067 p
[22] = VA_BITS16_UNDEFINED
;
4068 p
[23] = VA_BITS16_UNDEFINED
;
4069 p
[24] = VA_BITS16_UNDEFINED
;
4070 p
[25] = VA_BITS16_UNDEFINED
;
4071 p
[26] = VA_BITS16_UNDEFINED
;
4072 p
[27] = VA_BITS16_UNDEFINED
;
4073 p
[28] = VA_BITS16_UNDEFINED
;
4074 p
[29] = VA_BITS16_UNDEFINED
;
4075 p
[30] = VA_BITS16_UNDEFINED
;
4076 p
[31] = VA_BITS16_UNDEFINED
;
4077 p
[32] = VA_BITS16_UNDEFINED
;
4078 p
[33] = VA_BITS16_UNDEFINED
;
4079 p
[34] = VA_BITS16_UNDEFINED
;
4080 p
[35] = VA_BITS16_UNDEFINED
;
4086 /* else fall into slow case */
4087 make_mem_undefined(base
, len
);
4091 /* And this is an even more specialised case, for the case where there
4092 is no origin tracking, and the length is 128. */
4094 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o
) ( Addr base
)
4096 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O
);
4098 VG_(printf
)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base
);
4101 /* Slow(ish) version, which is fairly easily seen to be correct.
4103 if (LIKELY( VG_IS_8_ALIGNED(base
) )) {
4104 make_aligned_word64_undefined(base
+ 0);
4105 make_aligned_word64_undefined(base
+ 8);
4106 make_aligned_word64_undefined(base
+ 16);
4107 make_aligned_word64_undefined(base
+ 24);
4109 make_aligned_word64_undefined(base
+ 32);
4110 make_aligned_word64_undefined(base
+ 40);
4111 make_aligned_word64_undefined(base
+ 48);
4112 make_aligned_word64_undefined(base
+ 56);
4114 make_aligned_word64_undefined(base
+ 64);
4115 make_aligned_word64_undefined(base
+ 72);
4116 make_aligned_word64_undefined(base
+ 80);
4117 make_aligned_word64_undefined(base
+ 88);
4119 make_aligned_word64_undefined(base
+ 96);
4120 make_aligned_word64_undefined(base
+ 104);
4121 make_aligned_word64_undefined(base
+ 112);
4122 make_aligned_word64_undefined(base
+ 120);
4124 make_mem_undefined(base
, 128);
4128 /* Idea is: go fast when
4129 * 16-aligned and length is 128
4130 * the sm is available in the main primary map
4131 * the address range falls entirely with a single secondary map
4132 If all those conditions hold, just update the V+A bits by writing
4133 directly into the vabits array. (If the sm was distinguished, this
4134 will make a copy and then write to it.)
4136 Typically this applies to amd64 'ret' instructions, since RSP is
4137 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4139 if (LIKELY( VG_IS_16_ALIGNED(base
) )) {
4140 /* Now we know the address range is suitably sized and aligned. */
4141 UWord a_lo
= (UWord
)(base
);
4142 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4143 /* FIXME: come up with a sane story on the wraparound case
4144 (which of course cnanot happen, but still..) */
4145 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4146 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4147 /* Now we know the entire range is within the main primary map. */
4148 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4149 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4150 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4151 /* Now we know that the entire address range falls within a
4152 single secondary map, and that that secondary 'lives' in
4153 the main primary map. */
4154 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
);
4155 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4156 UWord v_off
= SM_OFF(a_lo
);
4157 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &sm
->vabits8
[v_off
]);
4158 w32
[ 0] = VA_BITS32_UNDEFINED
;
4159 w32
[ 1] = VA_BITS32_UNDEFINED
;
4160 w32
[ 2] = VA_BITS32_UNDEFINED
;
4161 w32
[ 3] = VA_BITS32_UNDEFINED
;
4162 w32
[ 4] = VA_BITS32_UNDEFINED
;
4163 w32
[ 5] = VA_BITS32_UNDEFINED
;
4164 w32
[ 6] = VA_BITS32_UNDEFINED
;
4165 w32
[ 7] = VA_BITS32_UNDEFINED
;
4171 /* The same, but for when base is 8 % 16, which is the situation
4172 with RSP for amd64-ELF immediately after call instructions.
4174 if (LIKELY( VG_IS_16_ALIGNED(base
+8) )) { // restricts to 8 aligned
4175 /* Now we know the address range is suitably sized and aligned. */
4176 UWord a_lo
= (UWord
)(base
);
4177 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4178 /* FIXME: come up with a sane story on the wraparound case
4179 (which of course cnanot happen, but still..) */
4180 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4181 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4182 /* Now we know the entire range is within the main primary map. */
4183 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4184 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4185 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4186 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
);
4187 /* Now we know that the entire address range falls within a
4188 single secondary map, and that that secondary 'lives' in
4189 the main primary map. */
4190 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4191 UWord v_off16
= SM_OFF_16(a_lo
);
4192 UShort
* w16
= &sm
->vabits16
[v_off16
];
4193 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &w16
[1]);
4194 /* The following assertion is commented out for obvious
4195 performance reasons, but was verified as valid when
4196 running the entire testsuite and also Firefox. */
4197 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4198 w16
[ 0] = VA_BITS16_UNDEFINED
; // w16[0]
4199 w32
[ 0] = VA_BITS32_UNDEFINED
; // w16[1,2]
4200 w32
[ 1] = VA_BITS32_UNDEFINED
; // w16[3,4]
4201 w32
[ 2] = VA_BITS32_UNDEFINED
; // w16[5,6]
4202 w32
[ 3] = VA_BITS32_UNDEFINED
; // w16[7,8]
4203 w32
[ 4] = VA_BITS32_UNDEFINED
; // w16[9,10]
4204 w32
[ 5] = VA_BITS32_UNDEFINED
; // w16[11,12]
4205 w32
[ 6] = VA_BITS32_UNDEFINED
; // w16[13,14]
4206 w16
[15] = VA_BITS16_UNDEFINED
; // w16[15]
4212 /* else fall into slow case */
4213 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
);
4214 make_mem_undefined(base
, 128);
4218 /*------------------------------------------------------------*/
4219 /*--- Checking memory ---*/
4220 /*------------------------------------------------------------*/
4231 /* Check permissions for address range. If inadequate permissions
4232 exist, *bad_addr is set to the offending address, so the caller can
4235 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4236 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4237 indicate the lowest failing address. Functions below are
4239 Bool
MC_(check_mem_is_noaccess
) ( Addr a
, SizeT len
, Addr
* bad_addr
)
4244 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS
);
4245 for (i
= 0; i
< len
; i
++) {
4246 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP
);
4247 vabits2
= get_vabits2(a
);
4248 if (VA_BITS2_NOACCESS
!= vabits2
) {
4249 if (bad_addr
!= NULL
) *bad_addr
= a
;
4257 static Bool
is_mem_addressable ( Addr a
, SizeT len
,
4258 /*OUT*/Addr
* bad_addr
)
4263 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE
);
4264 for (i
= 0; i
< len
; i
++) {
4265 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP
);
4266 vabits2
= get_vabits2(a
);
4267 if (VA_BITS2_NOACCESS
== vabits2
) {
4268 if (bad_addr
!= NULL
) *bad_addr
= a
;
4276 static MC_ReadResult
is_mem_defined ( Addr a
, SizeT len
,
4277 /*OUT*/Addr
* bad_addr
,
4283 PROF_EVENT(MCPE_IS_MEM_DEFINED
);
4284 DEBUG("is_mem_defined\n");
4286 if (otag
) *otag
= 0;
4287 if (bad_addr
) *bad_addr
= 0;
4288 for (i
= 0; i
< len
; i
++) {
4289 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP
);
4290 vabits2
= get_vabits2(a
);
4291 if (VA_BITS2_DEFINED
!= vabits2
) {
4292 // Error! Nb: Report addressability errors in preference to
4293 // definedness errors. And don't report definedeness errors unless
4294 // --undef-value-errors=yes.
4298 if (VA_BITS2_NOACCESS
== vabits2
) {
4301 if (MC_(clo_mc_level
) >= 2) {
4302 if (otag
&& MC_(clo_mc_level
) == 3) {
4303 *otag
= MC_(helperc_b_load1
)( a
);
4314 /* Like is_mem_defined but doesn't give up at the first uninitialised
4315 byte -- the entire range is always checked. This is important for
4316 detecting errors in the case where a checked range strays into
4317 invalid memory, but that fact is not detected by the ordinary
4318 is_mem_defined(), because of an undefined section that precedes the
4319 out of range section, possibly as a result of an alignment hole in
4320 the checked data. This version always checks the entire range and
4321 can report both a definedness and an accessbility error, if
4323 static void is_mem_defined_comprehensive (
4325 /*OUT*/Bool
* errorV
, /* is there a definedness err? */
4326 /*OUT*/Addr
* bad_addrV
, /* if so where? */
4327 /*OUT*/UInt
* otagV
, /* and what's its otag? */
4328 /*OUT*/Bool
* errorA
, /* is there an addressability err? */
4329 /*OUT*/Addr
* bad_addrA
/* if so where? */
4334 Bool already_saw_errV
= False
;
4336 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE
);
4337 DEBUG("is_mem_defined_comprehensive\n");
4339 tl_assert(!(*errorV
|| *errorA
));
4341 for (i
= 0; i
< len
; i
++) {
4342 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
);
4343 vabits2
= get_vabits2(a
);
4345 case VA_BITS2_DEFINED
:
4348 case VA_BITS2_UNDEFINED
:
4349 case VA_BITS2_PARTDEFINED
:
4350 if (!already_saw_errV
) {
4353 if (MC_(clo_mc_level
) == 3) {
4354 *otagV
= MC_(helperc_b_load1
)( a
);
4358 already_saw_errV
= True
;
4360 a
++; /* keep going */
4362 case VA_BITS2_NOACCESS
:
4365 return; /* give up now. */
4373 /* Check a zero-terminated ascii string. Tricky -- don't want to
4374 examine the actual bytes, to find the end, until we're sure it is
4377 static Bool
mc_is_defined_asciiz ( Addr a
, Addr
* bad_addr
, UInt
* otag
)
4381 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ
);
4382 DEBUG("mc_is_defined_asciiz\n");
4384 if (otag
) *otag
= 0;
4385 if (bad_addr
) *bad_addr
= 0;
4387 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP
);
4388 vabits2
= get_vabits2(a
);
4389 if (VA_BITS2_DEFINED
!= vabits2
) {
4390 // Error! Nb: Report addressability errors in preference to
4391 // definedness errors. And don't report definedeness errors unless
4392 // --undef-value-errors=yes.
4396 if (VA_BITS2_NOACCESS
== vabits2
) {
4399 if (MC_(clo_mc_level
) >= 2) {
4400 if (otag
&& MC_(clo_mc_level
) == 3) {
4401 *otag
= MC_(helperc_b_load1
)( a
);
4406 /* Ok, a is safe to read. */
4407 if (* ((UChar
*)a
) == 0) {
4415 /*------------------------------------------------------------*/
4416 /*--- Memory event handlers ---*/
4417 /*------------------------------------------------------------*/
4420 void check_mem_is_addressable ( CorePart part
, ThreadId tid
, const HChar
* s
,
4421 Addr base
, SizeT size
)
4424 Bool ok
= is_mem_addressable ( base
, size
, &bad_addr
);
4428 case Vg_CoreSysCall
:
4429 MC_(record_memparam_error
) ( tid
, bad_addr
,
4430 /*isAddrErr*/True
, s
, 0/*otag*/ );
4434 MC_(record_core_mem_error
)( tid
, s
);
4438 VG_(tool_panic
)("check_mem_is_addressable: unexpected CorePart");
4444 void check_mem_is_defined ( CorePart part
, ThreadId tid
, const HChar
* s
,
4445 Addr base
, SizeT size
)
4449 MC_ReadResult res
= is_mem_defined ( base
, size
, &bad_addr
, &otag
);
4452 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4455 case Vg_CoreSysCall
:
4456 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4457 isAddrErr
? 0 : otag
);
4460 case Vg_CoreSysCallArgInMem
:
4461 MC_(record_regparam_error
) ( tid
, s
, otag
);
4464 /* If we're being asked to jump to a silly address, record an error
4465 message before potentially crashing the entire system. */
4466 case Vg_CoreTranslate
:
4467 MC_(record_jump_error
)( tid
, bad_addr
);
4471 VG_(tool_panic
)("check_mem_is_defined: unexpected CorePart");
4477 void check_mem_is_defined_asciiz ( CorePart part
, ThreadId tid
,
4478 const HChar
* s
, Addr str
)
4481 Addr bad_addr
= 0; // shut GCC up
4484 tl_assert(part
== Vg_CoreSysCall
);
4485 res
= mc_is_defined_asciiz ( (Addr
)str
, &bad_addr
, &otag
);
4487 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4488 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4489 isAddrErr
? 0 : otag
);
4493 /* Handling of mmap and mprotect is not as simple as it seems.
4495 The underlying semantics are that memory obtained from mmap is
4496 always initialised, but may be inaccessible. And changes to the
4497 protection of memory do not change its contents and hence not its
4498 definedness state. Problem is we can't model
4499 inaccessible-but-with-some-definedness state; once we mark memory
4500 as inaccessible we lose all info about definedness, and so can't
4501 restore that if it is later made accessible again.
4503 One obvious thing to do is this:
4505 mmap/mprotect NONE -> noaccess
4506 mmap/mprotect other -> defined
4508 The problem case here is: taking accessible memory, writing
4509 uninitialised data to it, mprotecting it NONE and later mprotecting
4510 it back to some accessible state causes the undefinedness to be
4513 A better proposal is:
4515 (1) mmap NONE -> make noaccess
4516 (2) mmap other -> make defined
4518 (3) mprotect NONE -> # no change
4519 (4) mprotect other -> change any "noaccess" to "defined"
4521 (2) is OK because memory newly obtained from mmap really is defined
4522 (zeroed out by the kernel -- doing anything else would
4523 constitute a massive security hole.)
4525 (1) is OK because the only way to make the memory usable is via
4526 (4), in which case we also wind up correctly marking it all as
4529 (3) is the weak case. We choose not to change memory state.
4530 (presumably the range is in some mixture of "defined" and
4531 "undefined", viz, accessible but with arbitrary V bits). Doing
4532 nothing means we retain the V bits, so that if the memory is
4533 later mprotected "other", the V bits remain unchanged, so there
4534 can be no false negatives. The bad effect is that if there's
4535 an access in the area, then MC cannot warn; but at least we'll
4536 get a SEGV to show, so it's better than nothing.
4538 Consider the sequence (3) followed by (4). Any memory that was
4539 "defined" or "undefined" previously retains its state (as
4540 required). Any memory that was "noaccess" before can only have
4541 been made that way by (1), and so it's OK to change it to
4544 See https://bugs.kde.org/show_bug.cgi?id=205541
4545 and https://bugs.kde.org/show_bug.cgi?id=210268
4548 void mc_new_mem_mmap ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
,
4551 if (rr
|| ww
|| xx
) {
4552 /* (2) mmap/mprotect other -> defined */
4553 MC_(make_mem_defined
)(a
, len
);
4555 /* (1) mmap/mprotect NONE -> noaccess */
4556 MC_(make_mem_noaccess
)(a
, len
);
4561 void mc_new_mem_mprotect ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
)
4563 if (rr
|| ww
|| xx
) {
4564 /* (4) mprotect other -> change any "noaccess" to "defined" */
4565 make_mem_defined_if_noaccess(a
, len
);
4567 /* (3) mprotect NONE -> # no change */
4574 void mc_new_mem_startup( Addr a
, SizeT len
,
4575 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
4577 // Because code is defined, initialised variables get put in the data
4578 // segment and are defined, and uninitialised variables get put in the
4579 // bss segment and are auto-zeroed (and so defined).
4581 // It's possible that there will be padding between global variables.
4582 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4583 // a program uses it, Memcheck will not complain. This is arguably a
4584 // false negative, but it's a grey area -- the behaviour is defined (the
4585 // padding is zeroed) but it's probably not what the user intended. And
4586 // we can't avoid it.
4588 // Note: we generally ignore RWX permissions, because we can't track them
4589 // without requiring more than one A bit which would slow things down a
4590 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4591 // So we mark any such pages as "unaddressable".
4592 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4593 a
, (ULong
)len
, rr
, ww
, xx
);
4594 mc_new_mem_mmap(a
, len
, rr
, ww
, xx
, di_handle
);
4598 void mc_post_mem_write(CorePart part
, ThreadId tid
, Addr a
, SizeT len
)
4600 MC_(make_mem_defined
)(a
, len
);
4604 /*------------------------------------------------------------*/
4605 /*--- Register event handlers ---*/
4606 /*------------------------------------------------------------*/
4608 /* Try and get a nonzero origin for the guest state section of thread
4609 tid characterised by (offset,size). Return 0 if nothing to show
4611 static UInt
mb_get_origin_for_guest_offset ( ThreadId tid
,
4612 Int offset
, SizeT size
)
4617 sh2off
= MC_(get_otrack_shadow_offset
)( offset
, size
);
4619 return 0; /* This piece of guest state is not tracked */
4620 tl_assert(sh2off
>= 0);
4621 tl_assert(0 == (sh2off
% 4));
4622 area
[0] = 0x31313131;
4623 area
[2] = 0x27272727;
4624 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&area
[1], 2/*shadowno*/,sh2off
,4 );
4625 tl_assert(area
[0] == 0x31313131);
4626 tl_assert(area
[2] == 0x27272727);
4632 /* When some chunk of guest state is written, mark the corresponding
4633 shadow area as valid. This is used to initialise arbitrarily large
4634 chunks of guest state, hence the _SIZE value, which has to be as
4635 big as the biggest guest state.
4637 static void mc_post_reg_write ( CorePart part
, ThreadId tid
,
4638 PtrdiffT offset
, SizeT size
)
4640 # define MAX_REG_WRITE_SIZE 2264
4641 UChar area
[MAX_REG_WRITE_SIZE
];
4642 tl_assert(size
<= MAX_REG_WRITE_SIZE
);
4643 VG_(memset
)(area
, V_BITS8_DEFINED
, size
);
4644 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/,offset
,size
, area
);
4645 # undef MAX_REG_WRITE_SIZE
4649 void mc_post_reg_write_clientcall ( ThreadId tid
,
4650 PtrdiffT offset
, SizeT size
, Addr f
)
4652 mc_post_reg_write(/*dummy*/0, tid
, offset
, size
);
4655 /* Look at the definedness of the guest's shadow state for
4656 [offset, offset+len). If any part of that is undefined, record
4659 static void mc_pre_reg_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
4660 PtrdiffT offset
, SizeT size
)
4667 tl_assert(size
<= 16);
4669 VG_(get_shadow_regs_area
)( tid
, area
, 1/*shadowNo*/,offset
,size
);
4672 for (i
= 0; i
< size
; i
++) {
4673 if (area
[i
] != V_BITS8_DEFINED
) {
4682 /* We've found some undefinedness. See if we can also find an
4684 otag
= mb_get_origin_for_guest_offset( tid
, offset
, size
);
4685 MC_(record_regparam_error
) ( tid
, s
, otag
);
4689 /*------------------------------------------------------------*/
4690 /*--- Register-memory event handlers ---*/
4691 /*------------------------------------------------------------*/
4693 static void mc_copy_mem_to_reg ( CorePart part
, ThreadId tid
, Addr a
,
4694 PtrdiffT guest_state_offset
, SizeT size
)
4702 for (i
= 0; i
< size
; i
++) {
4703 get_vbits8( a
+i
, &vbits8
);
4704 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/, guest_state_offset
+i
,
4708 if (MC_(clo_mc_level
) != 3)
4711 /* Track origins. */
4712 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4718 d32
= MC_(helperc_b_load1
)( a
);
4721 d32
= MC_(helperc_b_load2
)( a
);
4724 d32
= MC_(helperc_b_load4
)( a
);
4727 d32
= MC_(helperc_b_load8
)( a
);
4730 d32
= MC_(helperc_b_load16
)( a
);
4733 d32
= MC_(helperc_b_load32
)( a
);
4739 VG_(set_shadow_regs_area
)( tid
, 2/*shadowNo*/, offset
, 4, (UChar
*)&d32
);
4742 static void mc_copy_reg_to_mem ( CorePart part
, ThreadId tid
,
4743 PtrdiffT guest_state_offset
, Addr a
,
4752 for (i
= 0; i
< size
; i
++) {
4753 VG_(get_shadow_regs_area
)( tid
, &vbits8
, 1/*shadowNo*/,
4754 guest_state_offset
+i
, 1 );
4755 set_vbits8( a
+i
, vbits8
);
4758 if (MC_(clo_mc_level
) != 3)
4761 /* Track origins. */
4762 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4766 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&d32
, 2/*shadowNo*/, offset
, 4 );
4769 MC_(helperc_b_store1
)( a
, d32
);
4772 MC_(helperc_b_store2
)( a
, d32
);
4775 MC_(helperc_b_store4
)( a
, d32
);
4778 MC_(helperc_b_store8
)( a
, d32
);
4781 MC_(helperc_b_store16
)( a
, d32
);
4784 MC_(helperc_b_store32
)( a
, d32
);
4792 /*------------------------------------------------------------*/
4793 /*--- Some static assertions ---*/
4794 /*------------------------------------------------------------*/
4796 /* The handwritten assembly helpers below have baked-in assumptions
4797 about various constant values. These assertions attempt to make
4798 that a bit safer by checking those values and flagging changes that
4799 would make the assembly invalid. Not perfect but it's better than
4802 STATIC_ASSERT(SM_CHUNKS
* 4 == 65536);
4804 STATIC_ASSERT(VA_BITS8_DEFINED
== 0xAA);
4805 STATIC_ASSERT(VA_BITS8_UNDEFINED
== 0x55);
4807 STATIC_ASSERT(V_BITS32_DEFINED
== 0x00000000);
4808 STATIC_ASSERT(V_BITS32_UNDEFINED
== 0xFFFFFFFF);
4810 STATIC_ASSERT(VA_BITS4_DEFINED
== 0xA);
4811 STATIC_ASSERT(VA_BITS4_UNDEFINED
== 0x5);
4813 STATIC_ASSERT(V_BITS16_DEFINED
== 0x0000);
4814 STATIC_ASSERT(V_BITS16_UNDEFINED
== 0xFFFF);
4816 STATIC_ASSERT(VA_BITS2_DEFINED
== 2);
4817 STATIC_ASSERT(VA_BITS2_UNDEFINED
== 1);
4819 STATIC_ASSERT(V_BITS8_DEFINED
== 0x00);
4820 STATIC_ASSERT(V_BITS8_UNDEFINED
== 0xFF);
4823 /*------------------------------------------------------------*/
4824 /*--- Functions called directly from generated code: ---*/
4825 /*--- Load/store handlers. ---*/
4826 /*------------------------------------------------------------*/
4828 /* Types: LOADV32, LOADV16, LOADV8 are:
4830 so they return 32-bits on 32-bit machines and 64-bits on
4831 64-bit machines. Addr has the same size as a host word.
4833 LOADV64 is always ULong fn ( Addr a )
4835 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4836 are a UWord, and for STOREV64 they are a ULong.
4839 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4840 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4841 primary map. This is all very tricky (and important!), so let's
4842 work through the maths by hand (below), *and* assert for these
4843 values at startup. */
4844 #define MASK(_szInBytes) \
4845 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4847 /* MASK only exists so as to define this macro. */
4848 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4849 ((_a) & MASK((_szInBits>>3)))
4851 /* On a 32-bit machine:
4853 N_PRIMARY_BITS == 16, so
4854 N_PRIMARY_MAP == 0x10000, so
4855 N_PRIMARY_MAP-1 == 0xFFFF, so
4856 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4858 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4859 = ~ ( 0xFFFF | 0xFFFF0000 )
4863 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4864 = ~ ( 0xFFFE | 0xFFFF0000 )
4868 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4869 = ~ ( 0xFFFC | 0xFFFF0000 )
4873 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4874 = ~ ( 0xFFF8 | 0xFFFF0000 )
4878 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4879 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4880 the 1-byte alignment case, it is always a zero value, since MASK(1)
4881 is zero. All as expected.
4883 On a 64-bit machine, it's more complex, since we're testing
4884 simultaneously for misalignment and for the address being at or
4887 N_PRIMARY_BITS == 20, so
4888 N_PRIMARY_MAP == 0x100000, so
4889 N_PRIMARY_MAP-1 == 0xFFFFF, so
4890 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4892 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4893 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4895 = 0xFFFF'FFF0'0000'0000
4897 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4898 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4900 = 0xFFFF'FFF0'0000'0001
4902 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4903 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4905 = 0xFFFF'FFF0'0000'0003
4907 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4908 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4910 = 0xFFFF'FFF0'0000'0007
4913 /*------------------------------------------------------------*/
4914 /*--- LOADV256 and LOADV128 ---*/
4915 /*------------------------------------------------------------*/
4918 void mc_LOADV_128_or_256 ( /*OUT*/ULong
* res
,
4919 Addr a
, SizeT nBits
, Bool isBigEndian
)
4921 PROF_EVENT(MCPE_LOADV_128_OR_256
);
4923 #ifndef PERF_FAST_LOADV
4924 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4928 UWord sm_off16
, vabits16
, j
;
4929 UWord nBytes
= nBits
/ 8;
4930 UWord nULongs
= nBytes
/ 8;
4933 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,nBits
) )) {
4934 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1
);
4935 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4939 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4940 suitably aligned, is mapped, and addressible. */
4941 for (j
= 0; j
< nULongs
; j
++) {
4942 sm
= get_secmap_for_reading_low(a
+ 8*j
);
4943 sm_off16
= SM_OFF_16(a
+ 8*j
);
4944 vabits16
= sm
->vabits16
[sm_off16
];
4946 // Convert V bits from compact memory form to expanded
4948 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4949 res
[j
] = V_BITS64_DEFINED
;
4950 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4951 res
[j
] = V_BITS64_UNDEFINED
;
4953 /* Slow case: some block of 8 bytes are not all-defined or
4955 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2
);
4956 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4965 VG_REGPARM(2) void MC_(helperc_LOADV256be
) ( /*OUT*/V256
* res
, Addr a
)
4967 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, True
);
4969 VG_REGPARM(2) void MC_(helperc_LOADV256le
) ( /*OUT*/V256
* res
, Addr a
)
4971 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, False
);
4974 VG_REGPARM(2) void MC_(helperc_LOADV128be
) ( /*OUT*/V128
* res
, Addr a
)
4976 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, True
);
4978 VG_REGPARM(2) void MC_(helperc_LOADV128le
) ( /*OUT*/V128
* res
, Addr a
)
4980 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, False
);
4983 /*------------------------------------------------------------*/
4985 /*------------------------------------------------------------*/
4988 ULong
mc_LOADV64 ( Addr a
, Bool isBigEndian
)
4990 PROF_EVENT(MCPE_LOADV64
);
4992 #ifndef PERF_FAST_LOADV
4993 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4996 UWord sm_off16
, vabits16
;
4999 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
5000 PROF_EVENT(MCPE_LOADV64_SLOW1
);
5001 return (ULong
)mc_LOADVn_slow( a
, 64, isBigEndian
);
5004 sm
= get_secmap_for_reading_low(a
);
5005 sm_off16
= SM_OFF_16(a
);
5006 vabits16
= sm
->vabits16
[sm_off16
];
5008 // Handle common case quickly: a is suitably aligned, is mapped, and
5010 // Convert V bits from compact memory form to expanded register form.
5011 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
5012 return V_BITS64_DEFINED
;
5013 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
5014 return V_BITS64_UNDEFINED
;
5016 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
5017 PROF_EVENT(MCPE_LOADV64_SLOW2
);
5018 return mc_LOADVn_slow( a
, 64, isBigEndian
);
5024 // Generic for all platforms
5025 VG_REGPARM(1) ULong
MC_(helperc_LOADV64be
) ( Addr a
)
5027 return mc_LOADV64(a
, True
);
5030 // Non-generic assembly for arm32-linux
5031 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5032 && defined(VGP_arm_linux)
5033 /* See mc_main_asm.c */
5035 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5036 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
5037 /* See mc_main_asm.c */
5040 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5041 VG_REGPARM(1) ULong
MC_(helperc_LOADV64le
) ( Addr a
)
5043 return mc_LOADV64(a
, False
);
5047 /*------------------------------------------------------------*/
5048 /*--- STOREV64 ---*/
5049 /*------------------------------------------------------------*/
5052 void mc_STOREV64 ( Addr a
, ULong vbits64
, Bool isBigEndian
)
5054 PROF_EVENT(MCPE_STOREV64
);
5056 #ifndef PERF_FAST_STOREV
5057 // XXX: this slow case seems to be marginally faster than the fast case!
5058 // Investigate further.
5059 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5062 UWord sm_off16
, vabits16
;
5065 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
5066 PROF_EVENT(MCPE_STOREV64_SLOW1
);
5067 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5071 sm
= get_secmap_for_reading_low(a
);
5072 sm_off16
= SM_OFF_16(a
);
5073 vabits16
= sm
->vabits16
[sm_off16
];
5075 // To understand the below cleverness, see the extensive comments
5076 // in MC_(helperc_STOREV8).
5077 if (LIKELY(V_BITS64_DEFINED
== vbits64
)) {
5078 if (LIKELY(vabits16
== (UShort
)VA_BITS16_DEFINED
)) {
5081 if (!is_distinguished_sm(sm
) && VA_BITS16_UNDEFINED
== vabits16
) {
5082 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
5085 PROF_EVENT(MCPE_STOREV64_SLOW2
);
5086 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5089 if (V_BITS64_UNDEFINED
== vbits64
) {
5090 if (vabits16
== (UShort
)VA_BITS16_UNDEFINED
) {
5093 if (!is_distinguished_sm(sm
) && VA_BITS16_DEFINED
== vabits16
) {
5094 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
5097 PROF_EVENT(MCPE_STOREV64_SLOW3
);
5098 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5102 PROF_EVENT(MCPE_STOREV64_SLOW4
);
5103 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
5108 VG_REGPARM(1) void MC_(helperc_STOREV64be
) ( Addr a
, ULong vbits64
)
5110 mc_STOREV64(a
, vbits64
, True
);
5112 VG_REGPARM(1) void MC_(helperc_STOREV64le
) ( Addr a
, ULong vbits64
)
5114 mc_STOREV64(a
, vbits64
, False
);
5117 /*------------------------------------------------------------*/
5119 /*------------------------------------------------------------*/
5122 UWord
mc_LOADV32 ( Addr a
, Bool isBigEndian
)
5124 PROF_EVENT(MCPE_LOADV32
);
5126 #ifndef PERF_FAST_LOADV
5127 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5130 UWord sm_off
, vabits8
;
5133 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5134 PROF_EVENT(MCPE_LOADV32_SLOW1
);
5135 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5138 sm
= get_secmap_for_reading_low(a
);
5140 vabits8
= sm
->vabits8
[sm_off
];
5142 // Handle common case quickly: a is suitably aligned, is mapped, and the
5143 // entire word32 it lives in is addressible.
5144 // Convert V bits from compact memory form to expanded register form.
5145 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5146 // Almost certainly not necessary, but be paranoid.
5147 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5148 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
5149 } else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) {
5150 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
5152 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5153 PROF_EVENT(MCPE_LOADV32_SLOW2
);
5154 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5160 // Generic for all platforms
5161 VG_REGPARM(1) UWord
MC_(helperc_LOADV32be
) ( Addr a
)
5163 return mc_LOADV32(a
, True
);
5166 // Non-generic assembly for arm32-linux
5167 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5168 && defined(VGP_arm_linux)
5169 /* See mc_main_asm.c */
5171 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5172 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5173 /* See mc_main_asm.c */
5176 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5177 VG_REGPARM(1) UWord
MC_(helperc_LOADV32le
) ( Addr a
)
5179 return mc_LOADV32(a
, False
);
5183 /*------------------------------------------------------------*/
5184 /*--- STOREV32 ---*/
5185 /*------------------------------------------------------------*/
5188 void mc_STOREV32 ( Addr a
, UWord vbits32
, Bool isBigEndian
)
5190 PROF_EVENT(MCPE_STOREV32
);
5192 #ifndef PERF_FAST_STOREV
5193 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5196 UWord sm_off
, vabits8
;
5199 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5200 PROF_EVENT(MCPE_STOREV32_SLOW1
);
5201 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5205 sm
= get_secmap_for_reading_low(a
);
5207 vabits8
= sm
->vabits8
[sm_off
];
5209 // To understand the below cleverness, see the extensive comments
5210 // in MC_(helperc_STOREV8).
5211 if (LIKELY(V_BITS32_DEFINED
== vbits32
)) {
5212 if (LIKELY(vabits8
== (UInt
)VA_BITS8_DEFINED
)) {
5215 if (!is_distinguished_sm(sm
) && VA_BITS8_UNDEFINED
== vabits8
) {
5216 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_DEFINED
;
5219 PROF_EVENT(MCPE_STOREV32_SLOW2
);
5220 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5223 if (V_BITS32_UNDEFINED
== vbits32
) {
5224 if (vabits8
== (UInt
)VA_BITS8_UNDEFINED
) {
5227 if (!is_distinguished_sm(sm
) && VA_BITS8_DEFINED
== vabits8
) {
5228 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_UNDEFINED
;
5231 PROF_EVENT(MCPE_STOREV32_SLOW3
);
5232 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5236 PROF_EVENT(MCPE_STOREV32_SLOW4
);
5237 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5242 VG_REGPARM(2) void MC_(helperc_STOREV32be
) ( Addr a
, UWord vbits32
)
5244 mc_STOREV32(a
, vbits32
, True
);
5246 VG_REGPARM(2) void MC_(helperc_STOREV32le
) ( Addr a
, UWord vbits32
)
5248 mc_STOREV32(a
, vbits32
, False
);
5251 /*------------------------------------------------------------*/
5253 /*------------------------------------------------------------*/
5256 UWord
mc_LOADV16 ( Addr a
, Bool isBigEndian
)
5258 PROF_EVENT(MCPE_LOADV16
);
5260 #ifndef PERF_FAST_LOADV
5261 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5264 UWord sm_off
, vabits8
;
5267 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5268 PROF_EVENT(MCPE_LOADV16_SLOW1
);
5269 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5272 sm
= get_secmap_for_reading_low(a
);
5274 vabits8
= sm
->vabits8
[sm_off
];
5275 // Handle common case quickly: a is suitably aligned, is mapped, and is
5277 // Convert V bits from compact memory form to expanded register form
5278 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS16_DEFINED
; }
5279 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS16_UNDEFINED
; }
5281 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5282 // the two sub-bytes.
5283 UChar vabits4
= extract_vabits4_from_vabits8(a
, vabits8
);
5284 if (vabits4
== VA_BITS4_DEFINED
) { return V_BITS16_DEFINED
; }
5285 else if (vabits4
== VA_BITS4_UNDEFINED
) { return V_BITS16_UNDEFINED
; }
5287 /* Slow case: the two bytes are not all-defined or all-undefined. */
5288 PROF_EVENT(MCPE_LOADV16_SLOW2
);
5289 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5296 // Generic for all platforms
5297 VG_REGPARM(1) UWord
MC_(helperc_LOADV16be
) ( Addr a
)
5299 return mc_LOADV16(a
, True
);
5302 // Non-generic assembly for arm32-linux
5303 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5304 && defined(VGP_arm_linux)
5305 __asm__( /* Derived from NCode template */
5308 ".global vgMemCheck_helperc_LOADV16le \n"
5309 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5310 "vgMemCheck_helperc_LOADV16le: \n" //
5312 " bne .LLV16LEc12 \n" // if misaligned
5313 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5314 " movw r3, #:lower16:primary_map \n" //
5315 " uxth r1, r0 \n" // r1 = sec-map-offB
5316 " movt r3, #:upper16:primary_map \n" //
5317 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5318 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5319 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5320 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5322 " mov r0, #0xFFFFFFFF \n" //
5323 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5326 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5327 " bne .LLV16LEc4 \n" //
5329 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5332 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5333 // Extract the relevant 4 bits and inspect.
5334 " and r2, r0, #2 \n" // addr & 2
5335 " add r2, r2, r2 \n" // 2 * (addr & 2)
5336 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5337 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5339 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5340 " beq .LLV16LEh9 \n" //
5342 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5343 " beq .LLV16LEc2 \n" //
5345 ".LLV16LEc12: \n" //
5346 " push {r4, lr} \n" //
5348 " mov r1, #16 \n" //
5349 " bl mc_LOADVn_slow \n" //
5350 " pop {r4, pc} \n" //
5351 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5355 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5356 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5360 ".global vgMemCheck_helperc_LOADV16le\n"
5361 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5362 "vgMemCheck_helperc_LOADV16le:\n"
5363 " test $0x1, %eax\n"
5364 " jne .LLV16LE5\n" /* jump if not aligned */
5366 " shr $0x10, %edx\n"
5367 " mov primary_map(,%edx,4), %ecx\n"
5368 " movzwl %ax, %edx\n"
5370 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5371 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5372 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5374 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5377 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5378 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5380 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5389 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5391 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5393 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5395 " jmp mc_LOADVn_slow\n"
5396 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5401 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5402 VG_REGPARM(1) UWord
MC_(helperc_LOADV16le
) ( Addr a
)
5404 return mc_LOADV16(a
, False
);
5408 /*------------------------------------------------------------*/
5409 /*--- STOREV16 ---*/
5410 /*------------------------------------------------------------*/
5412 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5414 Bool
accessible_vabits4_in_vabits8 ( Addr a
, UChar vabits8
)
5417 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
5418 shift
= (a
& 2) << 1; // shift by 0 or 4
5419 vabits8
>>= shift
; // shift the four bits to the bottom
5420 // check 2 x vabits2 != VA_BITS2_NOACCESS
5421 return ((0x3 & vabits8
) != VA_BITS2_NOACCESS
)
5422 && ((0xc & vabits8
) != VA_BITS2_NOACCESS
<< 2);
5426 void mc_STOREV16 ( Addr a
, UWord vbits16
, Bool isBigEndian
)
5428 PROF_EVENT(MCPE_STOREV16
);
5430 #ifndef PERF_FAST_STOREV
5431 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5434 UWord sm_off
, vabits8
;
5437 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5438 PROF_EVENT(MCPE_STOREV16_SLOW1
);
5439 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5443 sm
= get_secmap_for_reading_low(a
);
5445 vabits8
= sm
->vabits8
[sm_off
];
5447 // To understand the below cleverness, see the extensive comments
5448 // in MC_(helperc_STOREV8).
5449 if (LIKELY(V_BITS16_DEFINED
== vbits16
)) {
5450 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5453 if (!is_distinguished_sm(sm
)
5454 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5455 insert_vabits4_into_vabits8( a
, VA_BITS4_DEFINED
,
5456 &(sm
->vabits8
[sm_off
]) );
5459 PROF_EVENT(MCPE_STOREV16_SLOW2
);
5460 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5462 if (V_BITS16_UNDEFINED
== vbits16
) {
5463 if (vabits8
== VA_BITS8_UNDEFINED
) {
5466 if (!is_distinguished_sm(sm
)
5467 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5468 insert_vabits4_into_vabits8( a
, VA_BITS4_UNDEFINED
,
5469 &(sm
->vabits8
[sm_off
]) );
5472 PROF_EVENT(MCPE_STOREV16_SLOW3
);
5473 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5477 PROF_EVENT(MCPE_STOREV16_SLOW4
);
5478 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5484 VG_REGPARM(2) void MC_(helperc_STOREV16be
) ( Addr a
, UWord vbits16
)
5486 mc_STOREV16(a
, vbits16
, True
);
5488 VG_REGPARM(2) void MC_(helperc_STOREV16le
) ( Addr a
, UWord vbits16
)
5490 mc_STOREV16(a
, vbits16
, False
);
5493 /*------------------------------------------------------------*/
5495 /*------------------------------------------------------------*/
5497 /* Note: endianness is irrelevant for size == 1 */
5499 // Non-generic assembly for arm32-linux
5500 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5501 && defined(VGP_arm_linux)
5502 __asm__( /* Derived from NCode template */
5505 ".global vgMemCheck_helperc_LOADV8 \n"
5506 ".type vgMemCheck_helperc_LOADV8, %function \n"
5507 "vgMemCheck_helperc_LOADV8: \n" //
5508 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5509 " movw r3, #:lower16:primary_map \n" //
5510 " uxth r1, r0 \n" // r1 = sec-map-offB
5511 " movt r3, #:upper16:primary_map \n" //
5512 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5513 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5514 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5515 " bne .LLV8c0 \n" // no, goto .LLV8c0
5517 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5520 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5521 " bne .LLV8c4 \n" //
5523 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5526 // r1 holds sec-map-VABITS8
5527 // r0 holds the address. Extract the relevant 2 bits and inspect.
5528 " and r2, r0, #3 \n" // addr & 3
5529 " add r2, r2, r2 \n" // 2 * (addr & 3)
5530 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5531 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5533 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5534 " beq .LLV8h9 \n" //
5536 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5537 " beq .LLV8c2 \n" //
5539 " push {r4, lr} \n" //
5542 " bl mc_LOADVn_slow \n" //
5543 " pop {r4, pc} \n" //
5544 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5548 /* Non-generic assembly for x86-linux */
5549 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5550 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5554 ".global vgMemCheck_helperc_LOADV8\n"
5555 ".type vgMemCheck_helperc_LOADV8, @function\n"
5556 "vgMemCheck_helperc_LOADV8:\n"
5558 " shr $0x10, %edx\n"
5559 " mov primary_map(,%edx,4), %ecx\n"
5560 " movzwl %ax, %edx\n"
5562 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5563 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5564 " jne .LLV8LE2\n" /* jump if not defined */
5566 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5569 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5570 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5572 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5581 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5583 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5584 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5586 " jmp mc_LOADVn_slow\n"
5587 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5592 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5594 UWord
MC_(helperc_LOADV8
) ( Addr a
)
5596 PROF_EVENT(MCPE_LOADV8
);
5598 #ifndef PERF_FAST_LOADV
5599 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5602 UWord sm_off
, vabits8
;
5605 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5606 PROF_EVENT(MCPE_LOADV8_SLOW1
);
5607 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5610 sm
= get_secmap_for_reading_low(a
);
5612 vabits8
= sm
->vabits8
[sm_off
];
5613 // Convert V bits from compact memory form to expanded register form
5614 // Handle common case quickly: a is mapped, and the entire
5615 // word32 it lives in is addressible.
5616 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS8_DEFINED
; }
5617 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS8_UNDEFINED
; }
5619 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5621 UChar vabits2
= extract_vabits2_from_vabits8(a
, vabits8
);
5622 if (vabits2
== VA_BITS2_DEFINED
) { return V_BITS8_DEFINED
; }
5623 else if (vabits2
== VA_BITS2_UNDEFINED
) { return V_BITS8_UNDEFINED
; }
5625 /* Slow case: the byte is not all-defined or all-undefined. */
5626 PROF_EVENT(MCPE_LOADV8_SLOW2
);
5627 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5635 /*------------------------------------------------------------*/
5637 /*------------------------------------------------------------*/
5640 void MC_(helperc_STOREV8
) ( Addr a
, UWord vbits8
)
5642 PROF_EVENT(MCPE_STOREV8
);
5644 #ifndef PERF_FAST_STOREV
5645 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5648 UWord sm_off
, vabits8
;
5651 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5652 PROF_EVENT(MCPE_STOREV8_SLOW1
);
5653 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5657 sm
= get_secmap_for_reading_low(a
);
5659 vabits8
= sm
->vabits8
[sm_off
];
5661 // Clevernesses to speed up storing V bits.
5662 // The 64/32/16 bit cases also have similar clevernesses, but it
5663 // works a little differently to the code below.
5665 // Cleverness 1: sometimes we don't have to write the shadow memory at
5666 // all, if we can tell that what we want to write is the same as what is
5667 // already there. These cases are marked below as "defined on defined" and
5668 // "undefined on undefined".
5671 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5672 // be written in the secondary map. V bits can be directly written
5673 // if 4 conditions are respected:
5674 // * The address for which V bits are written is naturally aligned
5675 // on 1 byte for STOREV8 (this is always true)
5676 // on 2 bytes for STOREV16
5677 // on 4 bytes for STOREV32
5678 // on 8 bytes for STOREV64.
5679 // * V bits being written are either fully defined or fully undefined.
5680 // (for partially defined V bits, V bits cannot be directly written,
5681 // as the secondary vbits table must be maintained).
5682 // * the secmap is not distinguished (distinguished maps cannot be
5684 // * the memory corresponding to the V bits being written is
5685 // accessible (if one or more bytes are not accessible,
5686 // we must call mc_STOREVn_slow in order to report accessibility
5688 // Note that for STOREV32 and STOREV64, it is too expensive
5689 // to verify the accessibility of each byte for the benefit it
5690 // brings. Instead, a quicker check is done by comparing to
5691 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5692 // but misses some opportunity of direct modifications.
5693 // Checking each byte accessibility was measured for
5694 // STOREV32+perf tests and was slowing down all perf tests.
5695 // The cases corresponding to cleverness 2 are marked below as
5697 if (LIKELY(V_BITS8_DEFINED
== vbits8
)) {
5698 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5699 return; // defined on defined
5701 if (!is_distinguished_sm(sm
)
5702 && VA_BITS2_NOACCESS
!= extract_vabits2_from_vabits8(a
, vabits8
)) {
5704 insert_vabits2_into_vabits8( a
, VA_BITS2_DEFINED
,
5705 &(sm
->vabits8
[sm_off
]) );
5708 PROF_EVENT(MCPE_STOREV8_SLOW2
);
5709 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5712 if (V_BITS8_UNDEFINED
== vbits8
) {
5713 if (vabits8
== VA_BITS8_UNDEFINED
) {
5714 return; // undefined on undefined
5716 if (!is_distinguished_sm(sm
)
5717 && (VA_BITS2_NOACCESS
5718 != extract_vabits2_from_vabits8(a
, vabits8
))) {
5720 insert_vabits2_into_vabits8( a
, VA_BITS2_UNDEFINED
,
5721 &(sm
->vabits8
[sm_off
]) );
5724 PROF_EVENT(MCPE_STOREV8_SLOW3
);
5725 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5729 // Partially defined word
5730 PROF_EVENT(MCPE_STOREV8_SLOW4
);
5731 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5737 /*------------------------------------------------------------*/
5738 /*--- Functions called directly from generated code: ---*/
5739 /*--- Value-check failure handlers. ---*/
5740 /*------------------------------------------------------------*/
5742 /* Call these ones when an origin is available ... */
5744 void MC_(helperc_value_check0_fail_w_o
) ( UWord origin
) {
5745 MC_(record_cond_error
) ( VG_(get_running_tid
)(), (UInt
)origin
);
5749 void MC_(helperc_value_check1_fail_w_o
) ( UWord origin
) {
5750 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, (UInt
)origin
);
5754 void MC_(helperc_value_check4_fail_w_o
) ( UWord origin
) {
5755 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, (UInt
)origin
);
5759 void MC_(helperc_value_check8_fail_w_o
) ( UWord origin
) {
5760 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, (UInt
)origin
);
5764 void MC_(helperc_value_checkN_fail_w_o
) ( HWord sz
, UWord origin
) {
5765 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, (UInt
)origin
);
5768 /* ... and these when an origin isn't available. */
5771 void MC_(helperc_value_check0_fail_no_o
) ( void ) {
5772 MC_(record_cond_error
) ( VG_(get_running_tid
)(), 0/*origin*/ );
5776 void MC_(helperc_value_check1_fail_no_o
) ( void ) {
5777 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, 0/*origin*/ );
5781 void MC_(helperc_value_check4_fail_no_o
) ( void ) {
5782 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, 0/*origin*/ );
5786 void MC_(helperc_value_check8_fail_no_o
) ( void ) {
5787 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, 0/*origin*/ );
5791 void MC_(helperc_value_checkN_fail_no_o
) ( HWord sz
) {
5792 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, 0/*origin*/ );
5796 /*------------------------------------------------------------*/
5797 /*--- Metadata get/set functions, for client requests. ---*/
5798 /*------------------------------------------------------------*/
5800 // Nb: this expands the V+A bits out into register-form V bits, even though
5801 // they're in memory. This is for backward compatibility, and because it's
5802 // probably what the user wants.
5804 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5805 error [no longer used], 3 == addressing error. */
5806 /* Nb: We used to issue various definedness/addressability errors from here,
5807 but we took them out because they ranged from not-very-helpful to
5808 downright annoying, and they complicated the error data structures. */
5809 static Int
mc_get_or_set_vbits_for_client (
5813 Bool setting
, /* True <=> set vbits, False <=> get vbits */
5814 Bool is_client_request
/* True <=> real user request
5815 False <=> internal call from gdbserver */
5822 /* Check that arrays are addressible before doing any getting/setting.
5823 vbits to be checked only for real user request. */
5824 for (i
= 0; i
< szB
; i
++) {
5825 if (VA_BITS2_NOACCESS
== get_vabits2(a
+ i
) ||
5826 (is_client_request
&& VA_BITS2_NOACCESS
== get_vabits2(vbits
+ i
))) {
5834 for (i
= 0; i
< szB
; i
++) {
5835 ok
= set_vbits8(a
+ i
, ((UChar
*)vbits
)[i
]);
5840 for (i
= 0; i
< szB
; i
++) {
5841 ok
= get_vbits8(a
+ i
, &vbits8
);
5843 ((UChar
*)vbits
)[i
] = vbits8
;
5845 if (is_client_request
)
5846 // The bytes in vbits[] have now been set, so mark them as such.
5847 MC_(make_mem_defined
)(vbits
, szB
);
5854 /*------------------------------------------------------------*/
5855 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5856 /*------------------------------------------------------------*/
5858 /* For the memory leak detector, say whether an entire 64k chunk of
5859 address space is possibly in use, or not. If in doubt return
5862 Bool
MC_(is_within_valid_secondary
) ( Addr a
)
5864 SecMap
* sm
= maybe_get_secmap_for ( a
);
5865 if (sm
== NULL
|| sm
== &sm_distinguished
[SM_DIST_NOACCESS
]) {
5866 /* Definitely not in use. */
5874 /* For the memory leak detector, say whether or not a given word
5875 address is to be regarded as valid. */
5876 Bool
MC_(is_valid_aligned_word
) ( Addr a
)
5878 tl_assert(sizeof(UWord
) == 4 || sizeof(UWord
) == 8);
5879 tl_assert(VG_IS_WORD_ALIGNED(a
));
5880 if (get_vabits8_for_aligned_word32 (a
) != VA_BITS8_DEFINED
)
5882 if (sizeof(UWord
) == 8) {
5883 if (get_vabits8_for_aligned_word32 (a
+ 4) != VA_BITS8_DEFINED
)
5886 if (UNLIKELY(MC_(in_ignored_range
)(a
)))
5893 /*------------------------------------------------------------*/
5894 /*--- Initialisation ---*/
5895 /*------------------------------------------------------------*/
5897 static void init_shadow_memory ( void )
5902 tl_assert(V_BIT_UNDEFINED
== 1);
5903 tl_assert(V_BIT_DEFINED
== 0);
5904 tl_assert(V_BITS8_UNDEFINED
== 0xFF);
5905 tl_assert(V_BITS8_DEFINED
== 0);
5907 /* Build the 3 distinguished secondaries */
5908 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5909 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_NOACCESS
;
5911 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5912 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_UNDEFINED
;
5914 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5915 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_DEFINED
;
5917 /* Set up the primary map. */
5918 /* These entries gradually get overwritten as the used address
5920 for (i
= 0; i
< N_PRIMARY_MAP
; i
++)
5921 primary_map
[i
] = &sm_distinguished
[SM_DIST_NOACCESS
];
5923 /* Auxiliary primary maps */
5924 init_auxmap_L1_L2();
5926 /* auxmap_size = auxmap_used = 0;
5927 no ... these are statically initialised */
5929 /* Secondary V bit table */
5930 secVBitTable
= createSecVBitTable();
5934 /*------------------------------------------------------------*/
5935 /*--- Sanity check machinery (permanently engaged) ---*/
5936 /*------------------------------------------------------------*/
5938 static Bool
mc_cheap_sanity_check ( void )
5941 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK
);
5942 /* Check for sane operating level */
5943 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5945 /* nothing else useful we can rapidly check */
5949 static Bool
mc_expensive_sanity_check ( void )
5952 Word n_secmaps_found
;
5954 const HChar
* errmsg
;
5957 if (0) VG_(printf
)("expensive sanity check\n");
5960 n_sanity_expensive
++;
5961 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK
);
5963 /* Check for sane operating level */
5964 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5967 /* Check that the 3 distinguished SMs are still as they should be. */
5969 /* Check noaccess DSM. */
5970 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5971 for (i
= 0; i
< SM_CHUNKS
; i
++)
5972 if (sm
->vabits8
[i
] != VA_BITS8_NOACCESS
)
5975 /* Check undefined DSM. */
5976 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5977 for (i
= 0; i
< SM_CHUNKS
; i
++)
5978 if (sm
->vabits8
[i
] != VA_BITS8_UNDEFINED
)
5981 /* Check defined DSM. */
5982 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5983 for (i
= 0; i
< SM_CHUNKS
; i
++)
5984 if (sm
->vabits8
[i
] != VA_BITS8_DEFINED
)
5988 VG_(printf
)("memcheck expensive sanity: "
5989 "distinguished_secondaries have changed\n");
5993 /* If we're not checking for undefined value errors, the secondary V bit
5994 * table should be empty. */
5995 if (MC_(clo_mc_level
) == 1) {
5996 if (0 != VG_(OSetGen_Size
)(secVBitTable
))
6000 /* check the auxiliary maps, very thoroughly */
6001 n_secmaps_found
= 0;
6002 errmsg
= check_auxmap_L1_L2_sanity( &n_secmaps_found
);
6004 VG_(printf
)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg
);
6008 /* n_secmaps_found is now the number referred to by the auxiliary
6009 primary map. Now add on the ones referred to by the main
6011 for (i
= 0; i
< N_PRIMARY_MAP
; i
++) {
6012 if (primary_map
[i
] == NULL
) {
6015 if (!is_distinguished_sm(primary_map
[i
]))
6020 /* check that the number of secmaps issued matches the number that
6021 are reachable (iow, no secmap leaks) */
6022 if (n_secmaps_found
!= (n_issued_SMs
- n_deissued_SMs
))
6026 VG_(printf
)("memcheck expensive sanity: "
6027 "apparent secmap leakage\n");
6032 VG_(printf
)("memcheck expensive sanity: "
6033 "auxmap covers wrong address space\n");
6037 /* there is only one pointer to each secmap (expensive) */
6042 /*------------------------------------------------------------*/
6043 /*--- Command line args ---*/
6044 /*------------------------------------------------------------*/
6046 /* 31 Aug 2015: Vectorised code is now so widespread that
6047 --partial-loads-ok needs to be enabled by default on all platforms.
6048 Not doing so causes lots of false errors. */
6049 Bool
MC_(clo_partial_loads_ok
) = True
;
6050 Long
MC_(clo_freelist_vol
) = 20LL*1000LL*1000LL;
6051 Long
MC_(clo_freelist_big_blocks
) = 1LL*1000LL*1000LL;
6052 LeakCheckMode
MC_(clo_leak_check
) = LC_Summary
;
6053 VgRes
MC_(clo_leak_resolution
) = Vg_HighRes
;
6054 UInt
MC_(clo_show_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6055 UInt
MC_(clo_error_for_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
6056 UInt
MC_(clo_leak_check_heuristics
) = H2S(LchStdString
)
6059 | H2S( LchMultipleInheritance
);
6060 Bool
MC_(clo_xtree_leak
) = False
;
6061 const HChar
* MC_(clo_xtree_leak_file
) = "xtleak.kcg.%p";
6062 Bool
MC_(clo_workaround_gcc296_bugs
) = False
;
6063 Int
MC_(clo_malloc_fill
) = -1;
6064 Int
MC_(clo_free_fill
) = -1;
6065 KeepStacktraces
MC_(clo_keep_stacktraces
) = KS_alloc_and_free
;
6066 Int
MC_(clo_mc_level
) = 2;
6067 Bool
MC_(clo_show_mismatched_frees
) = True
;
6068 Bool
MC_(clo_show_realloc_size_zero
) = True
;
6070 ExpensiveDefinednessChecks
6071 MC_(clo_expensive_definedness_checks
) = EdcAUTO
;
6073 Bool
MC_(clo_ignore_range_below_sp
) = False
;
6074 UInt
MC_(clo_ignore_range_below_sp__first_offset
) = 0;
6075 UInt
MC_(clo_ignore_range_below_sp__last_offset
) = 0;
6077 static const HChar
* MC_(parse_leak_heuristics_tokens
) =
6078 "-,stdstring,length64,newarray,multipleinheritance";
6079 /* The first heuristic value (LchNone) has no keyword, as this is
6080 a fake heuristic used to collect the blocks found without any
6083 static Bool
mc_process_cmd_line_options(const HChar
* arg
)
6085 const HChar
* tmp_str
;
6088 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
6090 /* Set MC_(clo_mc_level):
6091 1 = A bit tracking only
6092 2 = A and V bit tracking, but no V bit origins
6093 3 = A and V bit tracking, and V bit origins
6095 Do this by inspecting --undef-value-errors= and
6096 --track-origins=. Reject the case --undef-value-errors=no
6097 --track-origins=yes as meaningless.
6099 if VG_BOOL_CLO(arg
, "--undef-value-errors", tmp_show
) {
6101 if (MC_(clo_mc_level
) == 1)
6102 MC_(clo_mc_level
) = 2;
6104 if (MC_(clo_mc_level
) == 3) {
6107 MC_(clo_mc_level
) = 1;
6111 else if VG_BOOL_CLO(arg
, "--track-origins", tmp_show
) {
6113 if (MC_(clo_mc_level
) == 1) {
6116 MC_(clo_mc_level
) = 3;
6119 if (MC_(clo_mc_level
) == 3)
6120 MC_(clo_mc_level
) = 2;
6123 else if VG_BOOL_CLO(arg
, "--partial-loads-ok", MC_(clo_partial_loads_ok
)) {}
6124 else if VG_USET_CLOM(cloPD
, arg
, "--errors-for-leak-kinds",
6125 MC_(parse_leak_kinds_tokens
),
6126 MC_(clo_error_for_leak_kinds
)) {}
6127 else if VG_USET_CLOM(cloPD
, arg
, "--show-leak-kinds",
6128 MC_(parse_leak_kinds_tokens
),
6129 MC_(clo_show_leak_kinds
)) {}
6130 else if VG_USET_CLOM(cloPD
, arg
, "--leak-check-heuristics",
6131 MC_(parse_leak_heuristics_tokens
),
6132 MC_(clo_leak_check_heuristics
)) {}
6133 else if (VG_BOOL_CLOM(cloPD
, arg
, "--show-reachable", tmp_show
)) {
6135 MC_(clo_show_leak_kinds
) = MC_(all_Reachedness
)();
6137 MC_(clo_show_leak_kinds
) &= ~R2S(Reachable
);
6140 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-possibly-lost", tmp_show
) {
6142 MC_(clo_show_leak_kinds
) |= R2S(Possible
);
6144 MC_(clo_show_leak_kinds
) &= ~R2S(Possible
);
6147 else if VG_BOOL_CLO(arg
, "--workaround-gcc296-bugs",
6148 MC_(clo_workaround_gcc296_bugs
)) {}
6150 else if VG_BINT_CLOM(cloPD
, arg
, "--freelist-vol", MC_(clo_freelist_vol
),
6151 0, 10*1000*1000*1000LL) {}
6153 else if VG_BINT_CLOM(cloPD
, arg
, "--freelist-big-blocks",
6154 MC_(clo_freelist_big_blocks
),
6155 0, 10*1000*1000*1000LL) {}
6157 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=no",
6158 MC_(clo_leak_check
), LC_Off
) {}
6159 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=summary",
6160 MC_(clo_leak_check
), LC_Summary
) {}
6161 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=yes",
6162 MC_(clo_leak_check
), LC_Full
) {}
6163 else if VG_XACT_CLOM(cloPD
, arg
, "--leak-check=full",
6164 MC_(clo_leak_check
), LC_Full
) {}
6166 else if VG_XACT_CLO(arg
, "--leak-resolution=low",
6167 MC_(clo_leak_resolution
), Vg_LowRes
) {}
6168 else if VG_XACT_CLO(arg
, "--leak-resolution=med",
6169 MC_(clo_leak_resolution
), Vg_MedRes
) {}
6170 else if VG_XACT_CLO(arg
, "--leak-resolution=high",
6171 MC_(clo_leak_resolution
), Vg_HighRes
) {}
6173 else if VG_STR_CLOM(cloPD
, arg
, "--ignore-ranges", tmp_str
) {
6174 Bool ok
= parse_ignore_ranges(tmp_str
);
6176 VG_(message
)(Vg_DebugMsg
,
6177 "ERROR: --ignore-ranges: "
6178 "invalid syntax, or end <= start in range\n");
6181 if (gIgnoredAddressRanges
) {
6183 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
6184 UWord val
= IAR_INVALID
;
6185 UWord key_min
= ~(UWord
)0;
6186 UWord key_max
= (UWord
)0;
6187 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
6188 gIgnoredAddressRanges
, i
);
6189 tl_assert(key_min
<= key_max
);
6190 UWord limit
= 0x4000000; /* 64M - entirely arbitrary limit */
6191 if (key_max
- key_min
> limit
&& val
== IAR_CommandLine
) {
6192 VG_(message
)(Vg_DebugMsg
,
6193 "ERROR: --ignore-ranges: suspiciously large range:\n");
6194 VG_(message
)(Vg_DebugMsg
,
6195 " 0x%lx-0x%lx (size %lu)\n", key_min
, key_max
,
6196 key_max
- key_min
+ 1);
6203 else if VG_STR_CLOM(cloPD
, arg
, "--ignore-range-below-sp", tmp_str
) {
6204 /* This seems at first a bit weird, but: in order to imply
6205 a non-wrapped-around address range, the first offset needs to be
6206 larger than the second one. For example
6207 --ignore-range-below-sp=8192,8189
6208 would cause accesses to in the range [SP-8192, SP-8189] to be
6210 UInt offs1
= 0, offs2
= 0;
6211 Bool ok
= parse_UInt_pair(&tmp_str
, &offs1
, &offs2
);
6212 // Ensure we used all the text after the '=' sign.
6213 if (ok
&& *tmp_str
!= 0) ok
= False
;
6215 VG_(message
)(Vg_DebugMsg
,
6216 "ERROR: --ignore-range-below-sp: invalid syntax. "
6217 " Expected \"...=decimalnumber-decimalnumber\".\n");
6220 if (offs1
> 1000*1000 /*arbitrary*/ || offs2
> 1000*1000 /*ditto*/) {
6221 VG_(message
)(Vg_DebugMsg
,
6222 "ERROR: --ignore-range-below-sp: suspiciously large "
6223 "offset(s): %u and %u\n", offs1
, offs2
);
6226 if (offs1
<= offs2
) {
6227 VG_(message
)(Vg_DebugMsg
,
6228 "ERROR: --ignore-range-below-sp: invalid offsets "
6229 "(the first must be larger): %u and %u\n", offs1
, offs2
);
6232 tl_assert(offs1
> offs2
);
6233 if (offs1
- offs2
> 4096 /*arbitrary*/) {
6234 VG_(message
)(Vg_DebugMsg
,
6235 "ERROR: --ignore-range-below-sp: suspiciously large "
6236 "range: %u-%u (size %u)\n", offs1
, offs2
, offs1
- offs2
);
6239 MC_(clo_ignore_range_below_sp
) = True
;
6240 MC_(clo_ignore_range_below_sp__first_offset
) = offs1
;
6241 MC_(clo_ignore_range_below_sp__last_offset
) = offs2
;
6245 else if VG_BHEX_CLO(arg
, "--malloc-fill", MC_(clo_malloc_fill
), 0x00,0xFF) {}
6246 else if VG_BHEX_CLO(arg
, "--free-fill", MC_(clo_free_fill
), 0x00,0xFF) {}
6248 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc",
6249 MC_(clo_keep_stacktraces
), KS_alloc
) {}
6250 else if VG_XACT_CLO(arg
, "--keep-stacktraces=free",
6251 MC_(clo_keep_stacktraces
), KS_free
) {}
6252 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-and-free",
6253 MC_(clo_keep_stacktraces
), KS_alloc_and_free
) {}
6254 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-then-free",
6255 MC_(clo_keep_stacktraces
), KS_alloc_then_free
) {}
6256 else if VG_XACT_CLO(arg
, "--keep-stacktraces=none",
6257 MC_(clo_keep_stacktraces
), KS_none
) {}
6259 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-mismatched-frees",
6260 MC_(clo_show_mismatched_frees
)) {}
6261 else if VG_BOOL_CLOM(cloPD
, arg
, "--show-realloc-size-zero",
6262 MC_(clo_show_realloc_size_zero
)) {}
6264 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=no",
6265 MC_(clo_expensive_definedness_checks
), EdcNO
) {}
6266 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=auto",
6267 MC_(clo_expensive_definedness_checks
), EdcAUTO
) {}
6268 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=yes",
6269 MC_(clo_expensive_definedness_checks
), EdcYES
) {}
6271 else if VG_BOOL_CLO(arg
, "--xtree-leak",
6272 MC_(clo_xtree_leak
)) {}
6273 else if VG_STR_CLO (arg
, "--xtree-leak-file",
6274 MC_(clo_xtree_leak_file
)) {}
6277 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
6283 VG_(fmsg_bad_option
)(arg
,
6284 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6288 static void mc_print_usage(void)
6291 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6292 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6293 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6294 " [definite,possible]\n"
6295 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6296 " [definite,possible]\n"
6297 " where kind is one of:\n"
6298 " definite indirect possible reachable all none\n"
6299 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6300 " improving leak search false positive [all]\n"
6301 " where heur is one of:\n"
6302 " stdstring length64 newarray multipleinheritance all none\n"
6303 " --show-reachable=yes same as --show-leak-kinds=all\n"
6304 " --show-reachable=no --show-possibly-lost=yes\n"
6305 " same as --show-leak-kinds=definite,possible\n"
6306 " --show-reachable=no --show-possibly-lost=no\n"
6307 " same as --show-leak-kinds=definite\n"
6308 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6309 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6310 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6311 " --track-origins=no|yes show origins of undefined values? [no]\n"
6312 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6313 " --expensive-definedness-checks=no|auto|yes\n"
6314 " Use extra-precise definedness tracking [auto]\n"
6315 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6316 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6317 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6318 " Use --ignore-range-below-sp instead.\n"
6319 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6320 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6321 " accesses at the given offsets below SP\n"
6322 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6323 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6324 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6325 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6326 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6327 " --show-realloc-size-zero=no|yes show realocs with a size of zero? [yes]\n"
6331 static void mc_print_debug_usage(void)
6339 /*------------------------------------------------------------*/
6340 /*--- Client blocks ---*/
6341 /*------------------------------------------------------------*/
6343 /* Client block management:
6345 This is managed as an expanding array of client block descriptors.
6346 Indices of live descriptors are issued to the client, so it can ask
6347 to free them later. Therefore we cannot slide live entries down
6348 over dead ones. Instead we must use free/inuse flags and scan for
6349 an empty slot at allocation time. This in turn means allocation is
6350 relatively expensive, so we hope this does not happen too often.
6352 An unused block has start == size == 0
6355 /* type CGenBlock is defined in mc_include.h */
6357 /* This subsystem is self-initialising. */
6358 static UWord cgb_size
= 0;
6359 static UWord cgb_used
= 0;
6360 static CGenBlock
* cgbs
= NULL
;
6362 /* Stats for this subsystem. */
6363 static ULong cgb_used_MAX
= 0; /* Max in use. */
6364 static ULong cgb_allocs
= 0; /* Number of allocs. */
6365 static ULong cgb_discards
= 0; /* Number of discards. */
6366 static ULong cgb_search
= 0; /* Number of searches. */
6369 /* Get access to the client block array. */
6370 void MC_(get_ClientBlock_array
)( /*OUT*/CGenBlock
** blocks
,
6371 /*OUT*/UWord
* nBlocks
)
6374 *nBlocks
= cgb_used
;
6379 Int
alloc_client_block ( void )
6382 CGenBlock
* cgbs_new
;
6386 for (i
= 0; i
< cgb_used
; i
++) {
6388 if (cgbs
[i
].start
== 0 && cgbs
[i
].size
== 0)
6392 /* Not found. Try to allocate one at the end. */
6393 if (cgb_used
< cgb_size
) {
6398 /* Ok, we have to allocate a new one. */
6399 tl_assert(cgb_used
== cgb_size
);
6400 sz_new
= (cgbs
== NULL
) ? 10 : (2 * cgb_size
);
6402 cgbs_new
= VG_(malloc
)( "mc.acb.1", sz_new
* sizeof(CGenBlock
) );
6403 for (i
= 0; i
< cgb_used
; i
++)
6404 cgbs_new
[i
] = cgbs
[i
];
6412 if (cgb_used
> cgb_used_MAX
)
6413 cgb_used_MAX
= cgb_used
;
6418 static void show_client_block_stats ( void )
6420 VG_(message
)(Vg_DebugMsg
,
6421 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6422 cgb_allocs
, cgb_discards
, cgb_used_MAX
, cgb_search
6425 static void print_monitor_help ( void )
6430 "memcheck monitor commands:\n"
6431 " xb <addr> [<len>]\n"
6432 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6433 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6434 " Then prints the bytes values below the corresponding validity bits\n"
6435 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6436 " Example: xb 0x8049c78 10\n"
6437 " get_vbits <addr> [<len>]\n"
6438 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6439 " make_memory [noaccess|undefined\n"
6440 " |defined|Definedifaddressable] <addr> [<len>]\n"
6441 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6442 " check_memory [addressable|defined] <addr> [<len>]\n"
6443 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6444 " and outputs a description of <addr>\n"
6445 " leak_check [full*|summary|xtleak]\n"
6446 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6447 " [heuristics heur1,heur2,...]\n"
6448 " [new|increased*|changed|any]\n"
6449 " [unlimited*|limited <max_loss_records_output>]\n"
6451 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6452 " where kind is one of:\n"
6453 " definite indirect possible reachable all none\n"
6454 " where heur is one of:\n"
6455 " stdstring length64 newarray multipleinheritance all none*\n"
6456 " Examples: leak_check\n"
6457 " leak_check summary any\n"
6458 " leak_check full kinds indirect,possible\n"
6459 " leak_check full reachable any limited 100\n"
6460 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6461 " [unlimited*|limited <max_blocks>]\n"
6462 " [heuristics heur1,heur2,...]\n"
6463 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6464 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6465 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6467 " who_points_at <addr> [<len>]\n"
6468 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6469 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6470 " with len > 1, will also show \"interior pointers\")\n"
6471 " xtmemory [<filename>]\n"
6472 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6476 /* Print szB bytes at address, with a format similar to the gdb command
6478 res[i] == 1 indicates the corresponding byte is addressable. */
6479 static void gdb_xb (Addr address
, SizeT szB
, Int res
[])
6483 for (i
= 0; i
< szB
; i
++) {
6487 VG_(printf
) ("\n"); // Terminate previous line
6488 VG_(printf
) ("%p:", (void*)(address
+i
));
6491 VG_(printf
) ("\t0x%02x", *(UChar
*)(address
+i
));
6493 VG_(printf
) ("\t0x??");
6495 VG_(printf
) ("\n"); // Terminate previous line
6499 /* Returns the address of the next non space character,
6500 or address of the string terminator. */
6501 static HChar
* next_non_space (HChar
*s
)
6503 while (*s
&& *s
== ' ')
6508 /* Parse an integer slice, i.e. a single integer or a range of integer.
6510 <integer>[..<integer> ]
6511 (spaces are allowed before and/or after ..).
6512 Return True if range correctly parsed, False otherwise. */
6513 static Bool
VG_(parse_slice
) (HChar
* s
, HChar
** saveptr
,
6514 UInt
*from
, UInt
*to
)
6519 wl
= VG_(strtok_r
) (s
, " ", saveptr
);
6521 /* slice must start with an integer. */
6523 VG_(gdb_printf
) ("expecting integer or slice <from>..<to>\n");
6526 *from
= VG_(strtoull10
) (wl
, &endptr
);
6528 VG_(gdb_printf
) ("invalid integer or slice <from>..<to>\n");
6532 if (*endptr
== '\0' && *next_non_space(*saveptr
) != '.') {
6533 /* wl token is an integer terminating the string
6534 or else next token does not start with .
6535 In both cases, the slice is a single integer. */
6540 if (*endptr
== '\0') {
6541 // iii .. => get the next token
6542 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6545 if (*endptr
!= '.' && *(endptr
+1) != '.') {
6546 VG_(gdb_printf
) ("expecting slice <from>..<to>\n");
6549 if ( *(endptr
+2) == ' ') {
6550 // It must be iii.. jjj => get the next token
6551 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6553 // It must be iii..jjj
6558 *to
= VG_(strtoull10
) (wl
, &endptr
);
6559 if (*endptr
!= '\0') {
6560 VG_(gdb_printf
) ("missing/wrong 'to' of slice <from>..<to>\n");
6565 VG_(gdb_printf
) ("<from> cannot be bigger than <to> "
6566 "in slice <from>..<to>\n");
6573 /* return True if request recognised, False otherwise */
6574 static Bool
handle_gdb_monitor_command (ThreadId tid
, HChar
*req
)
6577 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
6580 VG_(strcpy
) (s
, req
);
6582 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
6583 /* NB: if possible, avoid introducing a new command below which
6584 starts with the same first letter(s) as an already existing
6585 command. This ensures a shorter abbreviation for the user. */
6586 switch (VG_(keyword_id
)
6587 ("help get_vbits leak_check make_memory check_memory "
6588 "block_list who_points_at xb xtmemory",
6589 wcmd
, kwd_report_duplicated_matches
)) {
6590 case -2: /* multiple matches */
6592 case -1: /* not found */
6595 print_monitor_help();
6597 case 1: { /* get_vbits */
6600 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6603 Int unaddressable
= 0;
6604 for (i
= 0; i
< szB
; i
++) {
6605 Int res
= mc_get_or_set_vbits_for_client
6606 (address
+i
, (Addr
) &vbits
, 1,
6607 False
, /* get them */
6608 False
/* is client request */ );
6609 /* we are before the first character on next line, print a \n. */
6610 if ((i
% 32) == 0 && i
!= 0)
6612 /* we are before the next block of 4 starts, print a space. */
6613 else if ((i
% 4) == 0 && i
!= 0)
6616 VG_(printf
) ("%02x", vbits
);
6618 tl_assert(3 == res
);
6624 if (unaddressable
) {
6626 ("Address %p len %lu has %d bytes unaddressable\n",
6627 (void *)address
, szB
, unaddressable
);
6632 case 2: { /* leak_check */
6634 LeakCheckParams lcp
;
6635 HChar
* xt_filename
= NULL
;
6639 lcp
.show_leak_kinds
= R2S(Possible
) | R2S(Unreached
);
6640 lcp
.errors_for_leak_kinds
= 0; // no errors for interactive leak search.
6642 lcp
.deltamode
= LCD_Increased
;
6643 lcp
.max_loss_records_output
= 999999999;
6644 lcp
.requested_by_monitor_command
= True
;
6645 lcp
.xt_filename
= NULL
;
6647 for (kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6649 kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6650 switch (VG_(keyword_id
)
6651 ("full summary xtleak "
6652 "kinds reachable possibleleak definiteleak "
6654 "new increased changed any "
6655 "unlimited limited ",
6656 kw
, kwd_report_all
)) {
6657 case -2: err
++; break;
6658 case -1: err
++; break;
6660 lcp
.mode
= LC_Full
; break;
6661 case 1: /* summary */
6662 lcp
.mode
= LC_Summary
; break;
6663 case 2: /* xtleak */
6666 = VG_(expand_file_name
)("--xtleak-mc_main.c",
6667 "xtleak.kcg.%p.%n");
6668 lcp
.xt_filename
= xt_filename
;
6670 case 3: { /* kinds */
6671 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6673 || !VG_(parse_enum_set
)(MC_(parse_leak_kinds_tokens
),
6676 &lcp
.show_leak_kinds
)) {
6677 VG_(gdb_printf
) ("missing or malformed leak kinds set\n");
6682 case 4: /* reachable */
6683 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
6685 case 5: /* possibleleak */
6687 = R2S(Possible
) | R2S(IndirectLeak
) | R2S(Unreached
);
6689 case 6: /* definiteleak */
6690 lcp
.show_leak_kinds
= R2S(Unreached
);
6692 case 7: { /* heuristics */
6693 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6695 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6699 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6705 lcp
.deltamode
= LCD_New
; break;
6706 case 9: /* increased */
6707 lcp
.deltamode
= LCD_Increased
; break;
6708 case 10: /* changed */
6709 lcp
.deltamode
= LCD_Changed
; break;
6711 lcp
.deltamode
= LCD_Any
; break;
6712 case 12: /* unlimited */
6713 lcp
.max_loss_records_output
= 999999999; break;
6714 case 13: { /* limited */
6716 const HChar
* endptr
;
6718 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6721 endptr
= "empty"; /* to report an error below */
6724 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6727 if (*endptr
!= '\0')
6728 VG_(gdb_printf
) ("missing or malformed integer value\n");
6729 else if (int_value
> 0)
6730 lcp
.max_loss_records_output
= (UInt
) int_value
;
6732 VG_(gdb_printf
) ("max_loss_records_output must be >= 1,"
6733 " got %d\n", int_value
);
6741 MC_(detect_memory_leaks
)(tid
, &lcp
);
6742 if (xt_filename
!= NULL
)
6743 VG_(free
)(xt_filename
);
6747 case 3: { /* make_memory */
6750 Int kwdid
= VG_(keyword_id
)
6751 ("noaccess undefined defined Definedifaddressable",
6752 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6753 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6758 case 0: MC_(make_mem_noaccess
) (address
, szB
); break;
6759 case 1: make_mem_undefined_w_tid_and_okind ( address
, szB
, tid
,
6760 MC_OKIND_USER
); break;
6761 case 2: MC_(make_mem_defined
) ( address
, szB
); break;
6762 case 3: make_mem_defined_if_addressable ( address
, szB
); break;;
6763 default: tl_assert(0);
6768 case 4: { /* check_memory */
6776 ExeContext
* origin_ec
;
6779 Int kwdid
= VG_(keyword_id
)
6780 ("addressable defined",
6781 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6782 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6787 case 0: /* addressable */
6788 if (is_mem_addressable ( address
, szB
, &bad_addr
))
6789 VG_(printf
) ("Address %p len %lu addressable\n",
6790 (void *)address
, szB
);
6793 ("Address %p len %lu not addressable:\nbad address %p\n",
6794 (void *)address
, szB
, (void *) bad_addr
);
6795 // Describe this (probably live) address with current epoch
6796 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6798 case 1: /* defined */
6799 res
= is_mem_defined ( address
, szB
, &bad_addr
, &otag
);
6800 if (MC_AddrErr
== res
)
6802 ("Address %p len %lu not addressable:\nbad address %p\n",
6803 (void *)address
, szB
, (void *) bad_addr
);
6804 else if (MC_ValueErr
== res
) {
6807 case MC_OKIND_STACK
:
6808 src
= " was created by a stack allocation"; break;
6810 src
= " was created by a heap allocation"; break;
6812 src
= " was created by a client request"; break;
6813 case MC_OKIND_UNKNOWN
:
6815 default: tl_assert(0);
6818 ("Address %p len %lu not defined:\n"
6819 "Uninitialised value at %p%s\n",
6820 (void *)address
, szB
, (void *) bad_addr
, src
);
6822 if (VG_(is_plausible_ECU
)(ecu
)) {
6823 origin_ec
= VG_(get_ExeContext_from_ECU
)( ecu
);
6824 VG_(pp_ExeContext
)( origin_ec
);
6828 VG_(printf
) ("Address %p len %lu defined\n",
6829 (void *)address
, szB
);
6830 // Describe this (probably live) address with current epoch
6831 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6833 default: tl_assert(0);
6838 case 5: { /* block_list */
6841 UInt lr_nr_from
= 0;
6844 if (VG_(parse_slice
) (NULL
, &ssaveptr
, &lr_nr_from
, &lr_nr_to
)) {
6845 UInt limit_blocks
= 999999999;
6847 UInt heuristics
= 0;
6849 for (wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6851 wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6852 switch (VG_(keyword_id
) ("unlimited limited heuristics ",
6853 wl
, kwd_report_all
)) {
6854 case -2: return True
;
6855 case -1: return True
;
6856 case 0: /* unlimited */
6857 limit_blocks
= 999999999; break;
6858 case 1: /* limited */
6859 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6861 VG_(gdb_printf
) ("missing integer value\n");
6864 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6865 if (*the_end
!= '\0') {
6866 VG_(gdb_printf
) ("malformed integer value\n");
6869 if (int_value
<= 0) {
6870 VG_(gdb_printf
) ("max_blocks must be >= 1,"
6871 " got %d\n", int_value
);
6874 limit_blocks
= (UInt
) int_value
;
6876 case 2: /* heuristics */
6877 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6879 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6883 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6891 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6892 is 1 more than the index in lr_array. */
6893 if (lr_nr_from
== 0 || ! MC_(print_block_list
) (lr_nr_from
-1,
6897 VG_(gdb_printf
) ("invalid loss record nr\n");
6902 case 6: { /* who_points_at */
6906 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6908 if (address
== (Addr
) 0) {
6909 VG_(gdb_printf
) ("Cannot search who points at 0x0\n");
6912 MC_(who_points_at
) (address
, szB
);
6919 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6923 Int unaddressable
= 0;
6924 for (i
= 0; i
< szB
; i
++) {
6926 /* We going to print the first vabits of a new line.
6927 Terminate the previous line if needed: prints a line with the
6928 address and the data. */
6932 gdb_xb (address
+ i
- 8, 8, res
);
6934 VG_(printf
) ("\t"); // To align VABITS with gdb_xb layout
6936 res
[bnr
] = mc_get_or_set_vbits_for_client
6937 (address
+i
, (Addr
) &vbits
[bnr
], 1,
6938 False
, /* get them */
6939 False
/* is client request */ );
6940 if (res
[bnr
] == 1) {
6941 VG_(printf
) ("\t %02x", vbits
[bnr
]);
6943 tl_assert(3 == res
[bnr
]);
6945 VG_(printf
) ("\t __");
6949 if (szB
% 8 == 0 && szB
> 0)
6950 gdb_xb (address
+ szB
- 8, 8, res
);
6952 gdb_xb (address
+ szB
- szB
% 8, szB
% 8, res
);
6953 if (unaddressable
) {
6955 ("Address %p len %lu has %d bytes unaddressable\n",
6956 (void *)address
, szB
, unaddressable
);
6962 case 8: { /* xtmemory */
6964 filename
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6965 MC_(xtmemory_report
)(filename
, False
);
6975 /*------------------------------------------------------------*/
6976 /*--- Client requests ---*/
6977 /*------------------------------------------------------------*/
6979 static Bool
mc_handle_client_request ( ThreadId tid
, UWord
* arg
, UWord
* ret
)
6983 MC_Chunk
* mc
= NULL
;
6985 if (!VG_IS_TOOL_USERREQ('M','C',arg
[0])
6986 && VG_USERREQ__MALLOCLIKE_BLOCK
!= arg
[0]
6987 && VG_USERREQ__RESIZEINPLACE_BLOCK
!= arg
[0]
6988 && VG_USERREQ__FREELIKE_BLOCK
!= arg
[0]
6989 && VG_USERREQ__CREATE_MEMPOOL
!= arg
[0]
6990 && VG_USERREQ__DESTROY_MEMPOOL
!= arg
[0]
6991 && VG_USERREQ__MEMPOOL_ALLOC
!= arg
[0]
6992 && VG_USERREQ__MEMPOOL_FREE
!= arg
[0]
6993 && VG_USERREQ__MEMPOOL_TRIM
!= arg
[0]
6994 && VG_USERREQ__MOVE_MEMPOOL
!= arg
[0]
6995 && VG_USERREQ__MEMPOOL_CHANGE
!= arg
[0]
6996 && VG_USERREQ__MEMPOOL_EXISTS
!= arg
[0]
6997 && VG_USERREQ__GDB_MONITOR_COMMAND
!= arg
[0]
6998 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0]
6999 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0])
7003 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE
: {
7004 Bool ok
= is_mem_addressable ( arg
[1], arg
[2], &bad_addr
);
7006 MC_(record_user_error
) ( tid
, bad_addr
, /*isAddrErr*/True
, 0 );
7007 *ret
= ok
? (UWord
)NULL
: bad_addr
;
7011 case VG_USERREQ__CHECK_MEM_IS_DEFINED
: {
7012 Bool errorV
= False
;
7015 Bool errorA
= False
;
7017 is_mem_defined_comprehensive(
7019 &errorV
, &bad_addrV
, &otagV
, &errorA
, &bad_addrA
7022 MC_(record_user_error
) ( tid
, bad_addrV
,
7023 /*isAddrErr*/False
, otagV
);
7026 MC_(record_user_error
) ( tid
, bad_addrA
,
7027 /*isAddrErr*/True
, 0 );
7029 /* Return the lower of the two erring addresses, if any. */
7031 if (errorV
&& !errorA
) {
7034 if (!errorV
&& errorA
) {
7037 if (errorV
&& errorA
) {
7038 *ret
= bad_addrV
< bad_addrA
? bad_addrV
: bad_addrA
;
7043 case VG_USERREQ__DO_LEAK_CHECK
: {
7044 LeakCheckParams lcp
;
7048 else if (arg
[1] == 1)
7049 lcp
.mode
= LC_Summary
;
7051 VG_(message
)(Vg_UserMsg
,
7052 "Warning: unknown memcheck leak search mode\n");
7056 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
7057 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
7058 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
7061 lcp
.deltamode
= LCD_Any
;
7062 else if (arg
[2] == 1)
7063 lcp
.deltamode
= LCD_Increased
;
7064 else if (arg
[2] == 2)
7065 lcp
.deltamode
= LCD_Changed
;
7066 else if (arg
[2] == 3)
7067 lcp
.deltamode
= LCD_New
;
7071 "Warning: unknown memcheck leak search deltamode\n");
7072 lcp
.deltamode
= LCD_Any
;
7074 lcp
.max_loss_records_output
= 999999999;
7075 lcp
.requested_by_monitor_command
= False
;
7076 lcp
.xt_filename
= NULL
;
7078 MC_(detect_memory_leaks
)(tid
, &lcp
);
7079 *ret
= 0; /* return value is meaningless */
7083 case VG_USERREQ__MAKE_MEM_NOACCESS
:
7084 MC_(make_mem_noaccess
) ( arg
[1], arg
[2] );
7088 case VG_USERREQ__MAKE_MEM_UNDEFINED
:
7089 make_mem_undefined_w_tid_and_okind ( arg
[1], arg
[2], tid
,
7094 case VG_USERREQ__MAKE_MEM_DEFINED
:
7095 MC_(make_mem_defined
) ( arg
[1], arg
[2] );
7099 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE
:
7100 make_mem_defined_if_addressable ( arg
[1], arg
[2] );
7104 case VG_USERREQ__CREATE_BLOCK
: /* describe a block */
7105 if (arg
[1] != 0 && arg
[2] != 0) {
7106 i
= alloc_client_block();
7107 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7108 cgbs
[i
].start
= arg
[1];
7109 cgbs
[i
].size
= arg
[2];
7110 cgbs
[i
].desc
= VG_(strdup
)("mc.mhcr.1", (HChar
*)arg
[3]);
7111 cgbs
[i
].where
= VG_(record_ExeContext
) ( tid
, 0/*first_ip_delta*/ );
7117 case VG_USERREQ__DISCARD
: /* discard */
7119 || arg
[2] >= cgb_used
||
7120 (cgbs
[arg
[2]].start
== 0 && cgbs
[arg
[2]].size
== 0)) {
7123 tl_assert(arg
[2] >= 0 && arg
[2] < cgb_used
);
7124 cgbs
[arg
[2]].start
= cgbs
[arg
[2]].size
= 0;
7125 VG_(free
)(cgbs
[arg
[2]].desc
);
7131 case VG_USERREQ__GET_VBITS
:
7132 *ret
= mc_get_or_set_vbits_for_client
7133 ( arg
[1], arg
[2], arg
[3],
7134 False
/* get them */,
7135 True
/* is client request */ );
7138 case VG_USERREQ__SET_VBITS
:
7139 *ret
= mc_get_or_set_vbits_for_client
7140 ( arg
[1], arg
[2], arg
[3],
7141 True
/* set them */,
7142 True
/* is client request */ );
7145 case VG_USERREQ__COUNT_LEAKS
: { /* count leaked bytes */
7146 UWord
** argp
= (UWord
**)arg
;
7147 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7148 // if no prior leak checks performed).
7149 *argp
[1] = MC_(bytes_leaked
) + MC_(bytes_indirect
);
7150 *argp
[2] = MC_(bytes_dubious
);
7151 *argp
[3] = MC_(bytes_reachable
);
7152 *argp
[4] = MC_(bytes_suppressed
);
7153 // there is no argp[5]
7154 //*argp[5] = MC_(bytes_indirect);
7155 // XXX need to make *argp[1-4] defined; currently done in the
7156 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7160 case VG_USERREQ__COUNT_LEAK_BLOCKS
: { /* count leaked blocks */
7161 UWord
** argp
= (UWord
**)arg
;
7162 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7163 // if no prior leak checks performed).
7164 *argp
[1] = MC_(blocks_leaked
) + MC_(blocks_indirect
);
7165 *argp
[2] = MC_(blocks_dubious
);
7166 *argp
[3] = MC_(blocks_reachable
);
7167 *argp
[4] = MC_(blocks_suppressed
);
7168 // there is no argp[5]
7169 //*argp[5] = MC_(blocks_indirect);
7170 // XXX need to make *argp[1-4] defined; currently done in the
7171 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7175 case VG_USERREQ__MALLOCLIKE_BLOCK
: {
7176 Addr p
= (Addr
)arg
[1];
7177 SizeT sizeB
= arg
[2];
7179 Bool is_zeroed
= (Bool
)arg
[4];
7181 MC_(new_block
) ( tid
, p
, sizeB
, /*ignored*/0U, 0U, is_zeroed
,
7182 MC_AllocCustom
, MC_(malloc_list
) );
7184 MC_(make_mem_noaccess
) ( p
- rzB
, rzB
);
7185 MC_(make_mem_noaccess
) ( p
+ sizeB
, rzB
);
7189 case VG_USERREQ__RESIZEINPLACE_BLOCK
: {
7190 Addr p
= (Addr
)arg
[1];
7191 SizeT oldSizeB
= arg
[2];
7192 SizeT newSizeB
= arg
[3];
7195 MC_(handle_resizeInPlace
) ( tid
, p
, oldSizeB
, newSizeB
, rzB
);
7198 case VG_USERREQ__FREELIKE_BLOCK
: {
7199 Addr p
= (Addr
)arg
[1];
7202 MC_(handle_free
) ( tid
, p
, rzB
, MC_AllocCustom
);
7206 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
: {
7207 HChar
* s
= (HChar
*)arg
[1];
7208 Addr dst
= (Addr
) arg
[2];
7209 Addr src
= (Addr
) arg
[3];
7210 SizeT len
= (SizeT
)arg
[4];
7211 MC_(record_overlap_error
)(tid
, s
, src
, dst
, len
);
7215 case _VG_USERREQ__MEMCHECK_VERIFY_ALIGNMENT
: {
7216 struct AlignedAllocInfo
*aligned_alloc_info
= (struct AlignedAllocInfo
*)arg
[1];
7217 tl_assert(aligned_alloc_info
);
7219 switch (aligned_alloc_info
->alloc_kind
) {
7220 case AllocKindMemalign
:
7221 // other platforms just ensure it is a power of 2
7222 // ignore Illumos only enforcing multiple of 4 (probably a bug)
7223 if (aligned_alloc_info
->orig_alignment
== 0U ||
7224 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7225 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be power of 2)" );
7227 // size zero not allowed on all platforms (e.g. Illumos)
7228 if (aligned_alloc_info
->size
== 0) {
7229 MC_(record_bad_size
) ( tid
, aligned_alloc_info
->size
, "memalign()" );
7232 case AllocKindPosixMemalign
:
7233 // must be power of 2
7234 // alignment at least sizeof(size_t)
7235 // size of 0 implementation defined
7236 if (aligned_alloc_info
->orig_alignment
< sizeof(SizeT
) ||
7237 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7238 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero, a power of 2 and a multiple of sizeof(void*))" );
7240 if (aligned_alloc_info
->size
== 0) {
7241 MC_(record_bad_size
) ( tid
, aligned_alloc_info
->size
, "posix_memalign()" );
7244 case AllocKindAlignedAlloc
:
7245 // must be power of 2
7246 if ((aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7247 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be a power of 2)" );
7249 // size should be integral multiple of alignment
7250 if (aligned_alloc_info
->orig_alignment
&&
7251 aligned_alloc_info
->size
% aligned_alloc_info
->orig_alignment
!= 0U) {
7252 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, aligned_alloc_info
->size
, " (size should be a multiple of alignment)" );
7254 if (aligned_alloc_info
->size
== 0) {
7255 MC_(record_bad_size
) ( tid
, aligned_alloc_info
->size
, "aligned_alloc()" );
7258 case AllocKindDeleteSized
:
7259 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7260 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7261 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new/delete" );
7264 case AllocKindVecDeleteSized
:
7265 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7266 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7267 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new[][/delete[]" );
7270 case AllocKindNewAligned
:
7271 if (aligned_alloc_info
->orig_alignment
== 0 ||
7272 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7273 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7276 case AllocKindVecNewAligned
:
7277 if (aligned_alloc_info
->orig_alignment
== 0 ||
7278 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7279 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7282 case AllocKindDeleteAligned
:
7283 if (aligned_alloc_info
->orig_alignment
== 0 ||
7284 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7285 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7287 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7288 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7289 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new/delete");
7292 case AllocKindVecDeleteAligned
:
7293 if (aligned_alloc_info
->orig_alignment
== 0 ||
7294 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7295 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7297 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7298 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7299 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new[]/delete[]");
7302 case AllocKindDeleteSizedAligned
:
7303 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7304 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7305 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new/delete");
7307 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7308 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new/delete");
7310 if (aligned_alloc_info
->orig_alignment
== 0 ||
7311 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7312 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7315 case AllocKindVecDeleteSizedAligned
:
7316 mc
= VG_(HT_lookup
) ( MC_(malloc_list
), (UWord
)aligned_alloc_info
->mem
);
7317 if (mc
&& mc
->szB
!= aligned_alloc_info
->size
) {
7318 MC_(record_size_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->size
, "new[]/delete[]" );
7320 if (mc
&& aligned_alloc_info
->orig_alignment
!= mc
->alignB
) {
7321 MC_(record_align_mismatch_error
) ( tid
, mc
, aligned_alloc_info
->orig_alignment
, "new[]/delete[]");
7323 if (aligned_alloc_info
->orig_alignment
== 0 ||
7324 (aligned_alloc_info
->orig_alignment
& (aligned_alloc_info
->orig_alignment
- 1)) != 0) {
7325 MC_(record_bad_alignment
) ( tid
, aligned_alloc_info
->orig_alignment
, 0U, " (should be non-zero and a power of 2)" );
7335 case VG_USERREQ__CREATE_MEMPOOL
: {
7336 Addr pool
= (Addr
)arg
[1];
7338 Bool is_zeroed
= (Bool
)arg
[3];
7339 UInt flags
= arg
[4];
7341 // The create_mempool function does not know these mempool flags,
7342 // pass as booleans.
7343 MC_(create_mempool
) ( pool
, rzB
, is_zeroed
,
7344 (flags
& VALGRIND_MEMPOOL_AUTO_FREE
),
7345 (flags
& VALGRIND_MEMPOOL_METAPOOL
) );
7349 case VG_USERREQ__DESTROY_MEMPOOL
: {
7350 Addr pool
= (Addr
)arg
[1];
7352 MC_(destroy_mempool
) ( pool
);
7356 case VG_USERREQ__MEMPOOL_ALLOC
: {
7357 Addr pool
= (Addr
)arg
[1];
7358 Addr addr
= (Addr
)arg
[2];
7361 MC_(mempool_alloc
) ( tid
, pool
, addr
, size
);
7365 case VG_USERREQ__MEMPOOL_FREE
: {
7366 Addr pool
= (Addr
)arg
[1];
7367 Addr addr
= (Addr
)arg
[2];
7369 MC_(mempool_free
) ( pool
, addr
);
7373 case VG_USERREQ__MEMPOOL_TRIM
: {
7374 Addr pool
= (Addr
)arg
[1];
7375 Addr addr
= (Addr
)arg
[2];
7378 MC_(mempool_trim
) ( pool
, addr
, size
);
7382 case VG_USERREQ__MOVE_MEMPOOL
: {
7383 Addr poolA
= (Addr
)arg
[1];
7384 Addr poolB
= (Addr
)arg
[2];
7386 MC_(move_mempool
) ( poolA
, poolB
);
7390 case VG_USERREQ__MEMPOOL_CHANGE
: {
7391 Addr pool
= (Addr
)arg
[1];
7392 Addr addrA
= (Addr
)arg
[2];
7393 Addr addrB
= (Addr
)arg
[3];
7396 MC_(mempool_change
) ( pool
, addrA
, addrB
, size
);
7400 case VG_USERREQ__MEMPOOL_EXISTS
: {
7401 Addr pool
= (Addr
)arg
[1];
7403 *ret
= (UWord
) MC_(mempool_exists
) ( pool
);
7407 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
7408 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)arg
[1]);
7416 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
:
7417 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
: {
7419 = arg
[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
;
7421 = modify_ignore_ranges(addRange
, arg
[1], arg
[2]);
7429 "Warning: unknown memcheck client request code %llx\n",
7438 /*------------------------------------------------------------*/
7439 /*--- Crude profiling machinery. ---*/
7440 /*------------------------------------------------------------*/
7442 // We track a number of interesting events (using PROF_EVENT)
7443 // if MC_PROFILE_MEMORY is defined.
7445 #ifdef MC_PROFILE_MEMORY
7447 ULong
MC_(event_ctr
)[MCPE_LAST
];
7449 /* Event counter names. Use the name of the function that increases the
7450 event counter. Drop any MC_() and mc_ prefices. */
7451 static const HChar
* MC_(event_ctr_name
)[MCPE_LAST
] = {
7452 [MCPE_LOADVN_SLOW
] = "LOADVn_slow",
7453 [MCPE_LOADVN_SLOW_LOOP
] = "LOADVn_slow_loop",
7454 [MCPE_STOREVN_SLOW
] = "STOREVn_slow",
7455 [MCPE_STOREVN_SLOW_LOOP
] = "STOREVn_slow(loop)",
7456 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
] = "make_aligned_word32_undefined",
7457 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
] =
7458 "make_aligned_word32_undefined_slow",
7459 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
] = "make_aligned_word64_undefined",
7460 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
] =
7461 "make_aligned_word64_undefined_slow",
7462 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS
] = "make_aligned_word32_noaccess",
7463 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
] =
7464 "make_aligned_word32_noaccess_slow",
7465 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS
] = "make_aligned_word64_noaccess",
7466 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
] =
7467 "make_aligned_word64_noaccess_slow",
7468 [MCPE_MAKE_MEM_NOACCESS
] = "make_mem_noaccess",
7469 [MCPE_MAKE_MEM_UNDEFINED
] = "make_mem_undefined",
7470 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG
] = "make_mem_undefined_w_otag",
7471 [MCPE_MAKE_MEM_DEFINED
] = "make_mem_defined",
7472 [MCPE_CHEAP_SANITY_CHECK
] = "cheap_sanity_check",
7473 [MCPE_EXPENSIVE_SANITY_CHECK
] = "expensive_sanity_check",
7474 [MCPE_COPY_ADDRESS_RANGE_STATE
] = "copy_address_range_state",
7475 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
] = "copy_address_range_state(loop1)",
7476 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
] = "copy_address_range_state(loop2)",
7477 [MCPE_CHECK_MEM_IS_NOACCESS
] = "check_mem_is_noaccess",
7478 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP
] = "check_mem_is_noaccess(loop)",
7479 [MCPE_IS_MEM_ADDRESSABLE
] = "is_mem_addressable",
7480 [MCPE_IS_MEM_ADDRESSABLE_LOOP
] = "is_mem_addressable(loop)",
7481 [MCPE_IS_MEM_DEFINED
] = "is_mem_defined",
7482 [MCPE_IS_MEM_DEFINED_LOOP
] = "is_mem_defined(loop)",
7483 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE
] = "is_mem_defined_comprehensive",
7484 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
] =
7485 "is_mem_defined_comprehensive(loop)",
7486 [MCPE_IS_DEFINED_ASCIIZ
] = "is_defined_asciiz",
7487 [MCPE_IS_DEFINED_ASCIIZ_LOOP
] = "is_defined_asciiz(loop)",
7488 [MCPE_FIND_CHUNK_FOR_OLD
] = "find_chunk_for_OLD",
7489 [MCPE_FIND_CHUNK_FOR_OLD_LOOP
] = "find_chunk_for_OLD(loop)",
7490 [MCPE_SET_ADDRESS_RANGE_PERMS
] = "set_address_range_perms",
7491 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
] =
7492 "set_address_range_perms(single-secmap)",
7493 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
] =
7494 "set_address_range_perms(startof-secmap)",
7495 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
] =
7496 "set_address_range_perms(multiple-secmaps)",
7497 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
] =
7498 "set_address_range_perms(dist-sm1)",
7499 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
] =
7500 "set_address_range_perms(dist-sm2)",
7501 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
] =
7502 "set_address_range_perms(dist-sm1-quick)",
7503 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
] =
7504 "set_address_range_perms(dist-sm2-quick)",
7505 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
] = "set_address_range_perms(loop1a)",
7506 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
] = "set_address_range_perms(loop1b)",
7507 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
] = "set_address_range_perms(loop1c)",
7508 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
] = "set_address_range_perms(loop8a)",
7509 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
] = "set_address_range_perms(loop8b)",
7510 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
] = "set_address_range_perms(loop64K)",
7511 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
] =
7512 "set_address_range_perms(loop64K-free-dist-sm)",
7513 [MCPE_LOADV_128_OR_256_SLOW_LOOP
] = "LOADV_128_or_256_slow(loop)",
7514 [MCPE_LOADV_128_OR_256
] = "LOADV_128_or_256",
7515 [MCPE_LOADV_128_OR_256_SLOW1
] = "LOADV_128_or_256-slow1",
7516 [MCPE_LOADV_128_OR_256_SLOW2
] = "LOADV_128_or_256-slow2",
7517 [MCPE_LOADV64
] = "LOADV64",
7518 [MCPE_LOADV64_SLOW1
] = "LOADV64-slow1",
7519 [MCPE_LOADV64_SLOW2
] = "LOADV64-slow2",
7520 [MCPE_STOREV64
] = "STOREV64",
7521 [MCPE_STOREV64_SLOW1
] = "STOREV64-slow1",
7522 [MCPE_STOREV64_SLOW2
] = "STOREV64-slow2",
7523 [MCPE_STOREV64_SLOW3
] = "STOREV64-slow3",
7524 [MCPE_STOREV64_SLOW4
] = "STOREV64-slow4",
7525 [MCPE_LOADV32
] = "LOADV32",
7526 [MCPE_LOADV32_SLOW1
] = "LOADV32-slow1",
7527 [MCPE_LOADV32_SLOW2
] = "LOADV32-slow2",
7528 [MCPE_STOREV32
] = "STOREV32",
7529 [MCPE_STOREV32_SLOW1
] = "STOREV32-slow1",
7530 [MCPE_STOREV32_SLOW2
] = "STOREV32-slow2",
7531 [MCPE_STOREV32_SLOW3
] = "STOREV32-slow3",
7532 [MCPE_STOREV32_SLOW4
] = "STOREV32-slow4",
7533 [MCPE_LOADV16
] = "LOADV16",
7534 [MCPE_LOADV16_SLOW1
] = "LOADV16-slow1",
7535 [MCPE_LOADV16_SLOW2
] = "LOADV16-slow2",
7536 [MCPE_STOREV16
] = "STOREV16",
7537 [MCPE_STOREV16_SLOW1
] = "STOREV16-slow1",
7538 [MCPE_STOREV16_SLOW2
] = "STOREV16-slow2",
7539 [MCPE_STOREV16_SLOW3
] = "STOREV16-slow3",
7540 [MCPE_STOREV16_SLOW4
] = "STOREV16-slow4",
7541 [MCPE_LOADV8
] = "LOADV8",
7542 [MCPE_LOADV8_SLOW1
] = "LOADV8-slow1",
7543 [MCPE_LOADV8_SLOW2
] = "LOADV8-slow2",
7544 [MCPE_STOREV8
] = "STOREV8",
7545 [MCPE_STOREV8_SLOW1
] = "STOREV8-slow1",
7546 [MCPE_STOREV8_SLOW2
] = "STOREV8-slow2",
7547 [MCPE_STOREV8_SLOW3
] = "STOREV8-slow3",
7548 [MCPE_STOREV8_SLOW4
] = "STOREV8-slow4",
7549 [MCPE_NEW_MEM_STACK_4
] = "new_mem_stack_4",
7550 [MCPE_NEW_MEM_STACK_8
] = "new_mem_stack_8",
7551 [MCPE_NEW_MEM_STACK_12
] = "new_mem_stack_12",
7552 [MCPE_NEW_MEM_STACK_16
] = "new_mem_stack_16",
7553 [MCPE_NEW_MEM_STACK_32
] = "new_mem_stack_32",
7554 [MCPE_NEW_MEM_STACK_112
] = "new_mem_stack_112",
7555 [MCPE_NEW_MEM_STACK_128
] = "new_mem_stack_128",
7556 [MCPE_NEW_MEM_STACK_144
] = "new_mem_stack_144",
7557 [MCPE_NEW_MEM_STACK_160
] = "new_mem_stack_160",
7558 [MCPE_DIE_MEM_STACK_4
] = "die_mem_stack_4",
7559 [MCPE_DIE_MEM_STACK_8
] = "die_mem_stack_8",
7560 [MCPE_DIE_MEM_STACK_12
] = "die_mem_stack_12",
7561 [MCPE_DIE_MEM_STACK_16
] = "die_mem_stack_16",
7562 [MCPE_DIE_MEM_STACK_32
] = "die_mem_stack_32",
7563 [MCPE_DIE_MEM_STACK_112
] = "die_mem_stack_112",
7564 [MCPE_DIE_MEM_STACK_128
] = "die_mem_stack_128",
7565 [MCPE_DIE_MEM_STACK_144
] = "die_mem_stack_144",
7566 [MCPE_DIE_MEM_STACK_160
] = "die_mem_stack_160",
7567 [MCPE_NEW_MEM_STACK
] = "new_mem_stack",
7568 [MCPE_DIE_MEM_STACK
] = "die_mem_stack",
7569 [MCPE_MAKE_STACK_UNINIT_W_O
] = "MAKE_STACK_UNINIT_w_o",
7570 [MCPE_MAKE_STACK_UNINIT_NO_O
] = "MAKE_STACK_UNINIT_no_o",
7571 [MCPE_MAKE_STACK_UNINIT_128_NO_O
] = "MAKE_STACK_UNINIT_128_no_o",
7572 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
]
7573 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7574 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
]
7575 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7576 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
]
7577 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7580 static void init_prof_mem ( void )
7582 Int i
, name_count
= 0;
7584 for (i
= 0; i
< MCPE_LAST
; i
++) {
7585 MC_(event_ctr
)[i
] = 0;
7586 if (MC_(event_ctr_name
)[i
] != NULL
)
7590 /* Make sure every profiling event has a name */
7591 tl_assert(name_count
== MCPE_LAST
);
7594 static void done_prof_mem ( void )
7597 Bool spaced
= False
;
7598 for (i
= n
= 0; i
< MCPE_LAST
; i
++) {
7599 if (!spaced
&& (n
% 10) == 0) {
7603 if (MC_(event_ctr
)[i
] > 0) {
7606 VG_(printf
)( "prof mem event %3d: %11llu %s\n",
7607 i
, MC_(event_ctr
)[i
],
7608 MC_(event_ctr_name
)[i
]);
7615 static void init_prof_mem ( void ) { }
7616 static void done_prof_mem ( void ) { }
7621 /*------------------------------------------------------------*/
7622 /*--- Origin tracking stuff ---*/
7623 /*------------------------------------------------------------*/
7625 /*--------------------------------------------*/
7626 /*--- Origin tracking: load handlers ---*/
7627 /*--------------------------------------------*/
7629 static INLINE UInt
merge_origins ( UInt or1
, UInt or2
) {
7630 return or1
> or2
? or1
: or2
;
7633 UWord
VG_REGPARM(1) MC_(helperc_b_load1
)( Addr a
) {
7636 UWord lineoff
= oc_line_offset(a
);
7637 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7639 if (OC_ENABLE_ASSERTIONS
) {
7640 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7643 line
= find_OCacheLine( a
);
7645 descr
= line
->u
.main
.descr
[lineoff
];
7646 if (OC_ENABLE_ASSERTIONS
) {
7647 tl_assert(descr
< 0x10);
7650 if (LIKELY(0 == (descr
& (1 << byteoff
)))) {
7653 return line
->u
.main
.w32
[lineoff
];
7657 UWord
VG_REGPARM(1) MC_(helperc_b_load2
)( Addr a
) {
7660 UWord lineoff
, byteoff
;
7662 if (UNLIKELY(a
& 1)) {
7663 /* Handle misaligned case, slowly. */
7664 UInt oLo
= (UInt
)MC_(helperc_b_load1
)( a
+ 0 );
7665 UInt oHi
= (UInt
)MC_(helperc_b_load1
)( a
+ 1 );
7666 return merge_origins(oLo
, oHi
);
7669 lineoff
= oc_line_offset(a
);
7670 byteoff
= a
& 3; /* 0 or 2 */
7672 if (OC_ENABLE_ASSERTIONS
) {
7673 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7675 line
= find_OCacheLine( a
);
7677 descr
= line
->u
.main
.descr
[lineoff
];
7678 if (OC_ENABLE_ASSERTIONS
) {
7679 tl_assert(descr
< 0x10);
7682 if (LIKELY(0 == (descr
& (3 << byteoff
)))) {
7685 return line
->u
.main
.w32
[lineoff
];
7689 UWord
VG_REGPARM(1) MC_(helperc_b_load4
)( Addr a
) {
7694 if (UNLIKELY(a
& 3)) {
7695 /* Handle misaligned case, slowly. */
7696 UInt oLo
= (UInt
)MC_(helperc_b_load2
)( a
+ 0 );
7697 UInt oHi
= (UInt
)MC_(helperc_b_load2
)( a
+ 2 );
7698 return merge_origins(oLo
, oHi
);
7701 lineoff
= oc_line_offset(a
);
7702 if (OC_ENABLE_ASSERTIONS
) {
7703 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7706 line
= find_OCacheLine( a
);
7708 descr
= line
->u
.main
.descr
[lineoff
];
7709 if (OC_ENABLE_ASSERTIONS
) {
7710 tl_assert(descr
< 0x10);
7713 if (LIKELY(0 == descr
)) {
7716 return line
->u
.main
.w32
[lineoff
];
7720 UWord
VG_REGPARM(1) MC_(helperc_b_load8
)( Addr a
) {
7722 UChar descrLo
, descrHi
, descr
;
7725 if (UNLIKELY(a
& 7)) {
7726 /* Handle misaligned case, slowly. */
7727 UInt oLo
= (UInt
)MC_(helperc_b_load4
)( a
+ 0 );
7728 UInt oHi
= (UInt
)MC_(helperc_b_load4
)( a
+ 4 );
7729 return merge_origins(oLo
, oHi
);
7732 lineoff
= oc_line_offset(a
);
7733 if (OC_ENABLE_ASSERTIONS
) {
7734 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7737 line
= find_OCacheLine( a
);
7739 descrLo
= line
->u
.main
.descr
[lineoff
+ 0];
7740 descrHi
= line
->u
.main
.descr
[lineoff
+ 1];
7741 descr
= descrLo
| descrHi
;
7742 if (OC_ENABLE_ASSERTIONS
) {
7743 tl_assert(descr
< 0x10);
7746 if (LIKELY(0 == descr
)) {
7747 return 0; /* both 32-bit chunks are defined */
7749 UInt oLo
= descrLo
== 0 ? 0 : line
->u
.main
.w32
[lineoff
+ 0];
7750 UInt oHi
= descrHi
== 0 ? 0 : line
->u
.main
.w32
[lineoff
+ 1];
7751 return merge_origins(oLo
, oHi
);
7755 UWord
VG_REGPARM(1) MC_(helperc_b_load16
)( Addr a
) {
7756 UInt oLo
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7757 UInt oHi
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7758 UInt oBoth
= merge_origins(oLo
, oHi
);
7759 return (UWord
)oBoth
;
7762 UWord
VG_REGPARM(1) MC_(helperc_b_load32
)( Addr a
) {
7763 UInt oQ0
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7764 UInt oQ1
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7765 UInt oQ2
= (UInt
)MC_(helperc_b_load8
)( a
+ 16 );
7766 UInt oQ3
= (UInt
)MC_(helperc_b_load8
)( a
+ 24 );
7767 UInt oAll
= merge_origins(merge_origins(oQ0
, oQ1
),
7768 merge_origins(oQ2
, oQ3
));
7773 /*--------------------------------------------*/
7774 /*--- Origin tracking: store handlers ---*/
7775 /*--------------------------------------------*/
7777 void VG_REGPARM(2) MC_(helperc_b_store1
)( Addr a
, UWord d32
) {
7779 UWord lineoff
= oc_line_offset(a
);
7780 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7782 if (OC_ENABLE_ASSERTIONS
) {
7783 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7786 line
= find_OCacheLine( a
);
7788 #if OC_PRECISION_STORE
7789 if (LIKELY(d32
== 0)) {
7790 // The byte is defined. Just mark it as so in the descr and leave the w32
7791 // unchanged. This may make the descr become zero, so the line no longer
7792 // contains useful info, but that's OK. No loss of information.
7793 line
->u
.main
.descr
[lineoff
] &= ~(1 << byteoff
);
7794 } else if (d32
== line
->u
.main
.w32
[lineoff
]) {
7795 // At least one of the four bytes in the w32 is undefined with the same
7796 // origin. Just extend the mask. No loss of information.
7797 line
->u
.main
.descr
[lineoff
] |= (1 << byteoff
);
7799 // Here, we have a conflict: at least one byte in the group is undefined
7800 // but with some other origin. We can't represent both origins, so we
7801 // forget about the previous origin and install this one instead.
7802 line
->u
.main
.descr
[lineoff
] = (1 << byteoff
);
7803 line
->u
.main
.w32
[lineoff
] = d32
;
7807 line
->u
.main
.descr
[lineoff
] &= ~(1 << byteoff
);
7809 line
->u
.main
.descr
[lineoff
] |= (1 << byteoff
);
7810 line
->u
.main
.w32
[lineoff
] = d32
;
7815 void VG_REGPARM(2) MC_(helperc_b_store2
)( Addr a
, UWord d32
) {
7817 UWord lineoff
, byteoff
;
7819 if (UNLIKELY(a
& 1)) {
7820 /* Handle misaligned case, slowly. */
7821 MC_(helperc_b_store1
)( a
+ 0, d32
);
7822 MC_(helperc_b_store1
)( a
+ 1, d32
);
7826 lineoff
= oc_line_offset(a
);
7827 byteoff
= a
& 3; /* 0 or 2 */
7829 if (OC_ENABLE_ASSERTIONS
) {
7830 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7833 line
= find_OCacheLine( a
);
7835 #if OC_PRECISION_STORE
7836 // Same logic as in the store1 case above.
7837 if (LIKELY(d32
== 0)) {
7838 line
->u
.main
.descr
[lineoff
] &= ~(3 << byteoff
);
7839 } else if (d32
== line
->u
.main
.w32
[lineoff
]) {
7840 line
->u
.main
.descr
[lineoff
] |= (3 << byteoff
);
7841 line
->u
.main
.w32
[lineoff
] = d32
;
7843 line
->u
.main
.descr
[lineoff
] = (3 << byteoff
);
7844 line
->u
.main
.w32
[lineoff
] = d32
;
7848 line
->u
.main
.descr
[lineoff
] &= ~(3 << byteoff
);
7850 line
->u
.main
.descr
[lineoff
] |= (3 << byteoff
);
7851 line
->u
.main
.w32
[lineoff
] = d32
;
7856 void VG_REGPARM(2) MC_(helperc_b_store4
)( Addr a
, UWord d32
) {
7860 if (UNLIKELY(a
& 3)) {
7861 /* Handle misaligned case, slowly. */
7862 MC_(helperc_b_store2
)( a
+ 0, d32
);
7863 MC_(helperc_b_store2
)( a
+ 2, d32
);
7867 lineoff
= oc_line_offset(a
);
7868 if (OC_ENABLE_ASSERTIONS
) {
7869 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7872 line
= find_OCacheLine( a
);
7875 line
->u
.main
.descr
[lineoff
] = 0;
7877 line
->u
.main
.descr
[lineoff
] = 0xF;
7878 line
->u
.main
.w32
[lineoff
] = d32
;
7882 void VG_REGPARM(2) MC_(helperc_b_store8
)( Addr a
, UWord d32
) {
7883 STATIC_ASSERT(OC_W32S_PER_LINE
== 8);
7887 if (UNLIKELY(a
& 7)) {
7888 /* Handle misaligned case, slowly. */
7889 MC_(helperc_b_store4
)( a
+ 0, d32
);
7890 MC_(helperc_b_store4
)( a
+ 4, d32
);
7894 lineoff
= oc_line_offset(a
);
7895 if (OC_ENABLE_ASSERTIONS
) {
7896 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7899 line
= find_OCacheLine( a
);
7902 line
->u
.main
.descr
[lineoff
+ 0] = 0;
7903 line
->u
.main
.descr
[lineoff
+ 1] = 0;
7905 line
->u
.main
.descr
[lineoff
+ 0] = 0xF;
7906 line
->u
.main
.descr
[lineoff
+ 1] = 0xF;
7907 line
->u
.main
.w32
[lineoff
+ 0] = d32
;
7908 line
->u
.main
.w32
[lineoff
+ 1] = d32
;
7912 void VG_REGPARM(2) MC_(helperc_b_store16
)( Addr a
, UWord d32
) {
7913 STATIC_ASSERT(OC_W32S_PER_LINE
== 8);
7917 if (UNLIKELY(a
& 15)) {
7918 /* Handle misaligned case, slowly. */
7919 MC_(helperc_b_store8
)( a
+ 0, d32
);
7920 MC_(helperc_b_store8
)( a
+ 8, d32
);
7924 lineoff
= oc_line_offset(a
);
7925 if (OC_ENABLE_ASSERTIONS
) {
7926 tl_assert(lineoff
== (lineoff
& 4)); /*0,4*//*since 16-aligned*/
7929 line
= find_OCacheLine( a
);
7932 line
->u
.main
.descr
[lineoff
+ 0] = 0;
7933 line
->u
.main
.descr
[lineoff
+ 1] = 0;
7934 line
->u
.main
.descr
[lineoff
+ 2] = 0;
7935 line
->u
.main
.descr
[lineoff
+ 3] = 0;
7937 line
->u
.main
.descr
[lineoff
+ 0] = 0xF;
7938 line
->u
.main
.descr
[lineoff
+ 1] = 0xF;
7939 line
->u
.main
.descr
[lineoff
+ 2] = 0xF;
7940 line
->u
.main
.descr
[lineoff
+ 3] = 0xF;
7941 line
->u
.main
.w32
[lineoff
+ 0] = d32
;
7942 line
->u
.main
.w32
[lineoff
+ 1] = d32
;
7943 line
->u
.main
.w32
[lineoff
+ 2] = d32
;
7944 line
->u
.main
.w32
[lineoff
+ 3] = d32
;
7948 void VG_REGPARM(2) MC_(helperc_b_store32
)( Addr a
, UWord d32
) {
7949 STATIC_ASSERT(OC_W32S_PER_LINE
== 8);
7953 if (UNLIKELY(a
& 31)) {
7954 /* Handle misaligned case, slowly. */
7955 MC_(helperc_b_store16
)( a
+ 0, d32
);
7956 MC_(helperc_b_store16
)( a
+ 16, d32
);
7960 lineoff
= oc_line_offset(a
);
7961 if (OC_ENABLE_ASSERTIONS
) {
7962 tl_assert(lineoff
== 0);
7965 line
= find_OCacheLine( a
);
7968 line
->u
.main
.descr
[0] = 0;
7969 line
->u
.main
.descr
[1] = 0;
7970 line
->u
.main
.descr
[2] = 0;
7971 line
->u
.main
.descr
[3] = 0;
7972 line
->u
.main
.descr
[4] = 0;
7973 line
->u
.main
.descr
[5] = 0;
7974 line
->u
.main
.descr
[6] = 0;
7975 line
->u
.main
.descr
[7] = 0;
7977 line
->u
.main
.descr
[0] = 0xF;
7978 line
->u
.main
.descr
[1] = 0xF;
7979 line
->u
.main
.descr
[2] = 0xF;
7980 line
->u
.main
.descr
[3] = 0xF;
7981 line
->u
.main
.descr
[4] = 0xF;
7982 line
->u
.main
.descr
[5] = 0xF;
7983 line
->u
.main
.descr
[6] = 0xF;
7984 line
->u
.main
.descr
[7] = 0xF;
7985 line
->u
.main
.w32
[0] = d32
;
7986 line
->u
.main
.w32
[1] = d32
;
7987 line
->u
.main
.w32
[2] = d32
;
7988 line
->u
.main
.w32
[3] = d32
;
7989 line
->u
.main
.w32
[4] = d32
;
7990 line
->u
.main
.w32
[5] = d32
;
7991 line
->u
.main
.w32
[6] = d32
;
7992 line
->u
.main
.w32
[7] = d32
;
7997 /*--------------------------------------------*/
7998 /*--- Origin tracking: sarp handlers ---*/
7999 /*--------------------------------------------*/
8001 // We may get asked to do very large SARPs (bug 446103), hence it is important
8002 // to process 32-byte chunks at a time when possible.
8004 __attribute__((noinline
))
8005 static void ocache_sarp_Set_Origins ( Addr a
, UWord len
, UInt otag
) {
8006 if ((a
& 1) && len
>= 1) {
8007 MC_(helperc_b_store1
)( a
, otag
);
8011 if ((a
& 2) && len
>= 2) {
8012 MC_(helperc_b_store2
)( a
, otag
);
8016 if ((a
& 4) && len
>= 4) {
8017 MC_(helperc_b_store4
)( a
, otag
);
8021 if ((a
& 8) && len
>= 8) {
8022 MC_(helperc_b_store8
)( a
, otag
);
8026 if ((a
& 16) && len
>= 16) {
8027 MC_(helperc_b_store16
)( a
, otag
);
8032 tl_assert(0 == (a
& 31));
8034 MC_(helperc_b_store32
)( a
, otag
);
8040 MC_(helperc_b_store16
)( a
, otag
);
8045 MC_(helperc_b_store8
)( a
, otag
);
8050 MC_(helperc_b_store4
)( a
, otag
);
8055 MC_(helperc_b_store2
)( a
, otag
);
8060 MC_(helperc_b_store1
)( a
, otag
);
8064 tl_assert(len
== 0);
8067 __attribute__((noinline
))
8068 static void ocache_sarp_Clear_Origins ( Addr a
, UWord len
) {
8069 if ((a
& 1) && len
>= 1) {
8070 MC_(helperc_b_store1
)( a
, 0 );
8074 if ((a
& 2) && len
>= 2) {
8075 MC_(helperc_b_store2
)( a
, 0 );
8079 if ((a
& 4) && len
>= 4) {
8080 MC_(helperc_b_store4
)( a
, 0 );
8084 if ((a
& 8) && len
>= 8) {
8085 MC_(helperc_b_store8
)( a
, 0 );
8089 if ((a
& 16) && len
>= 16) {
8090 MC_(helperc_b_store16
)( a
, 0 );
8095 tl_assert(0 == (a
& 31));
8097 MC_(helperc_b_store32
)( a
, 0 );
8103 MC_(helperc_b_store16
)( a
, 0 );
8108 MC_(helperc_b_store8
)( a
, 0 );
8113 MC_(helperc_b_store4
)( a
, 0 );
8118 MC_(helperc_b_store2
)( a
, 0 );
8123 MC_(helperc_b_store1
)( a
, 0 );
8127 tl_assert(len
== 0);
8131 /*------------------------------------------------------------*/
8132 /*--- Setup and finalisation ---*/
8133 /*------------------------------------------------------------*/
8135 static void mc_post_clo_init ( void )
8137 /* If we've been asked to emit XML, mash around various other
8138 options so as to constrain the output somewhat. */
8140 /* Extract as much info as possible from the leak checker. */
8141 MC_(clo_leak_check
) = LC_Full
;
8144 if (MC_(clo_freelist_big_blocks
) >= MC_(clo_freelist_vol
)
8145 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8146 VG_(message
)(Vg_UserMsg
,
8147 "Warning: --freelist-big-blocks value %lld has no effect\n"
8148 "as it is >= to --freelist-vol value %lld\n",
8149 MC_(clo_freelist_big_blocks
),
8150 MC_(clo_freelist_vol
));
8153 if (MC_(clo_workaround_gcc296_bugs
)
8154 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8156 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
8157 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
8162 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
8164 if (MC_(clo_mc_level
) == 3) {
8165 /* We're doing origin tracking. */
8166 # ifdef PERF_FAST_STACK
8167 VG_(track_new_mem_stack_4_w_ECU
) ( mc_new_mem_stack_4_w_ECU
);
8168 VG_(track_new_mem_stack_8_w_ECU
) ( mc_new_mem_stack_8_w_ECU
);
8169 VG_(track_new_mem_stack_12_w_ECU
) ( mc_new_mem_stack_12_w_ECU
);
8170 VG_(track_new_mem_stack_16_w_ECU
) ( mc_new_mem_stack_16_w_ECU
);
8171 VG_(track_new_mem_stack_32_w_ECU
) ( mc_new_mem_stack_32_w_ECU
);
8172 VG_(track_new_mem_stack_112_w_ECU
) ( mc_new_mem_stack_112_w_ECU
);
8173 VG_(track_new_mem_stack_128_w_ECU
) ( mc_new_mem_stack_128_w_ECU
);
8174 VG_(track_new_mem_stack_144_w_ECU
) ( mc_new_mem_stack_144_w_ECU
);
8175 VG_(track_new_mem_stack_160_w_ECU
) ( mc_new_mem_stack_160_w_ECU
);
8177 VG_(track_new_mem_stack_w_ECU
) ( mc_new_mem_stack_w_ECU
);
8178 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_make_ECU
);
8180 /* Not doing origin tracking */
8181 # ifdef PERF_FAST_STACK
8182 VG_(track_new_mem_stack_4
) ( mc_new_mem_stack_4
);
8183 VG_(track_new_mem_stack_8
) ( mc_new_mem_stack_8
);
8184 VG_(track_new_mem_stack_12
) ( mc_new_mem_stack_12
);
8185 VG_(track_new_mem_stack_16
) ( mc_new_mem_stack_16
);
8186 VG_(track_new_mem_stack_32
) ( mc_new_mem_stack_32
);
8187 VG_(track_new_mem_stack_112
) ( mc_new_mem_stack_112
);
8188 VG_(track_new_mem_stack_128
) ( mc_new_mem_stack_128
);
8189 VG_(track_new_mem_stack_144
) ( mc_new_mem_stack_144
);
8190 VG_(track_new_mem_stack_160
) ( mc_new_mem_stack_160
);
8192 VG_(track_new_mem_stack
) ( mc_new_mem_stack
);
8193 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_no_ECU
);
8196 // We assume that brk()/sbrk() does not initialise new memory. Is this
8197 // accurate? John Reiser says:
8199 // 0) sbrk() can *decrease* process address space. No zero fill is done
8200 // for a decrease, not even the fragment on the high end of the last page
8201 // that is beyond the new highest address. For maximum safety and
8202 // portability, then the bytes in the last page that reside above [the
8203 // new] sbrk(0) should be considered to be uninitialized, but in practice
8204 // it is exceedingly likely that they will retain their previous
8207 // 1) If an increase is large enough to require new whole pages, then
8208 // those new whole pages (like all new pages) are zero-filled by the
8209 // operating system. So if sbrk(0) already is page aligned, then
8210 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
8212 // 2) Any increase that lies within an existing allocated page is not
8213 // changed. So if (x = sbrk(0)) is not page aligned, then
8214 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
8215 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
8216 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
8217 // of them come along for the ride because the operating system deals
8218 // only in whole pages. Again, for maximum safety and portability, then
8219 // anything that lives above [the new] sbrk(0) should be considered
8220 // uninitialized, but in practice will retain previous contents [zero in
8225 // A key property of sbrk/brk is that new whole pages that are supplied
8226 // by the operating system *do* get initialized to zero.
8228 // As for the portability of all this:
8230 // sbrk and brk are not POSIX. However, any system that is a derivative
8231 // of *nix has sbrk and brk because there are too many software (such as
8232 // the Bourne shell) which rely on the traditional memory map (.text,
8233 // .data+.bss, stack) and the existence of sbrk/brk.
8235 // So we should arguably observe all this. However:
8236 // - The current inaccuracy has caused maybe one complaint in seven years(?)
8237 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
8238 // doubt most programmers know the above information.
8239 // So I'm not terribly unhappy with marking it as undefined. --njn.
8241 // [More: I think most of what John said only applies to sbrk(). It seems
8242 // that brk() always deals in whole pages. And since this event deals
8243 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
8244 // just mark all memory it allocates as defined.]
8246 # if !defined(VGO_solaris)
8247 if (MC_(clo_mc_level
) == 3)
8248 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_make_ECU
);
8250 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_no_ECU
);
8252 // On Solaris, brk memory has to be marked as defined, otherwise we get
8253 // many false positives.
8254 VG_(track_new_mem_brk
) ( make_mem_defined_w_tid
);
8257 /* This origin tracking cache is huge (~100M), so only initialise
8259 if (MC_(clo_mc_level
) >= 3) {
8261 tl_assert(ocacheL1
!= NULL
);
8262 for (UInt i
= 0; i
< 4096; i
++ ) {
8263 tl_assert(ocachesL2
[i
] != NULL
);
8266 tl_assert(ocacheL1
== NULL
);
8267 for (UInt i
= 0; i
< 4096; i
++ ) {
8268 tl_assert(ocachesL2
[i
] == NULL
);
8272 MC_(chunk_poolalloc
) = VG_(newPA
)
8273 (sizeof(MC_Chunk
) + MC_(n_where_pointers
)() * sizeof(ExeContext
*),
8276 "mc.cMC.1 (MC_Chunk pools)",
8279 /* Do not check definedness of guest state if --undef-value-errors=no */
8280 if (MC_(clo_mc_level
) >= 2)
8281 VG_(track_pre_reg_read
) ( mc_pre_reg_read
);
8283 if (VG_(clo_xtree_memory
) == Vg_XTMemory_Full
) {
8284 if (MC_(clo_keep_stacktraces
) == KS_none
8285 || MC_(clo_keep_stacktraces
) == KS_free
)
8286 VG_(fmsg_bad_option
)("--keep-stacktraces",
8287 "To use --xtree-memory=full, you must"
8288 " keep at least the alloc stacktrace\n");
8289 // Activate full xtree memory profiling.
8290 VG_(XTMemory_Full_init
)(VG_(XT_filter_1top_and_maybe_below_main
));
8295 static void print_SM_info(const HChar
* type
, Int n_SMs
)
8297 VG_(message
)(Vg_DebugMsg
,
8298 " memcheck: SMs: %s = %d (%luk, %luM)\n",
8301 n_SMs
* sizeof(SecMap
) / 1024UL,
8302 n_SMs
* sizeof(SecMap
) / (1024 * 1024UL) );
8305 static void mc_print_stats (void)
8307 SizeT max_secVBit_szB
, max_SMs_szB
, max_shmem_szB
;
8309 VG_(message
)(Vg_DebugMsg
, " memcheck: freelist: vol %lld length %lld\n",
8310 VG_(free_queue_volume
), VG_(free_queue_length
));
8311 VG_(message
)(Vg_DebugMsg
,
8312 " memcheck: sanity checks: %d cheap, %d expensive\n",
8313 n_sanity_cheap
, n_sanity_expensive
);
8314 VG_(message
)(Vg_DebugMsg
,
8315 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
8317 n_auxmap_L2_nodes
* 64,
8318 n_auxmap_L2_nodes
/ 16 );
8319 VG_(message
)(Vg_DebugMsg
,
8320 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
8321 n_auxmap_L1_searches
, n_auxmap_L1_cmps
,
8322 (10ULL * n_auxmap_L1_cmps
)
8323 / (n_auxmap_L1_searches
? n_auxmap_L1_searches
: 1)
8325 VG_(message
)(Vg_DebugMsg
,
8326 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
8327 n_auxmap_L2_searches
, n_auxmap_L2_nodes
8330 print_SM_info("n_issued ", n_issued_SMs
);
8331 print_SM_info("n_deissued ", n_deissued_SMs
);
8332 print_SM_info("max_noaccess ", max_noaccess_SMs
);
8333 print_SM_info("max_undefined", max_undefined_SMs
);
8334 print_SM_info("max_defined ", max_defined_SMs
);
8335 print_SM_info("max_non_DSM ", max_non_DSM_SMs
);
8337 // Three DSMs, plus the non-DSM ones
8338 max_SMs_szB
= (3 + max_non_DSM_SMs
) * sizeof(SecMap
);
8339 // The 3*sizeof(Word) bytes is the AVL node metadata size.
8340 // The VG_ROUNDUP is because the OSet pool allocator will/must align
8341 // the elements on pointer size.
8342 // Note that the pool allocator has some additional small overhead
8343 // which is not counted in the below.
8344 // Hardwiring this logic sucks, but I don't see how else to do it.
8345 max_secVBit_szB
= max_secVBit_nodes
*
8346 (3*sizeof(Word
) + VG_ROUNDUP(sizeof(SecVBitNode
), sizeof(void*)));
8347 max_shmem_szB
= sizeof(primary_map
) + max_SMs_szB
+ max_secVBit_szB
;
8349 VG_(message
)(Vg_DebugMsg
,
8350 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
8351 max_secVBit_nodes
, max_secVBit_szB
/ 1024,
8352 max_secVBit_szB
/ (1024 * 1024));
8353 VG_(message
)(Vg_DebugMsg
,
8354 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8355 sec_vbits_new_nodes
+ sec_vbits_updates
,
8356 sec_vbits_new_nodes
, sec_vbits_updates
);
8357 VG_(message
)(Vg_DebugMsg
,
8358 " memcheck: max shadow mem size: %luk, %luM\n",
8359 max_shmem_szB
/ 1024, max_shmem_szB
/ (1024 * 1024));
8361 if (MC_(clo_mc_level
) >= 3) {
8362 VG_(message
)(Vg_DebugMsg
,
8363 " ocacheL1: %'14lu refs %'14lu misses (%'lu lossage)\n",
8364 stats_ocacheL1_find
,
8365 stats_ocacheL1_misses
,
8366 stats_ocacheL1_lossage
);
8367 VG_(message
)(Vg_DebugMsg
,
8368 " ocacheL1: %'14lu at 0 %'14lu at 1\n",
8369 stats_ocacheL1_find
- stats_ocacheL1_misses
8370 - stats_ocacheL1_found_at_1
8371 - stats_ocacheL1_found_at_N
,
8372 stats_ocacheL1_found_at_1
);
8373 VG_(message
)(Vg_DebugMsg
,
8374 " ocacheL1: %'14lu at 2+ %'14lu move-fwds\n",
8375 stats_ocacheL1_found_at_N
,
8376 stats_ocacheL1_movefwds
);
8377 VG_(message
)(Vg_DebugMsg
,
8378 " ocacheL1: %'14lu sizeB %'14d useful\n",
8379 (SizeT
)sizeof(OCache
),
8380 4 * OC_W32S_PER_LINE
* OC_LINES_PER_SET
* OC_N_SETS
);
8381 VG_(message
)(Vg_DebugMsg
,
8382 " ocacheL2: %'14lu finds %'14lu misses\n",
8383 stats__ocacheL2_finds
,
8384 stats__ocacheL2_misses
);
8385 VG_(message
)(Vg_DebugMsg
,
8386 " ocacheL2: %'14lu adds %'14lu dels\n",
8387 stats__ocacheL2_adds
,
8388 stats__ocacheL2_dels
);
8389 VG_(message
)(Vg_DebugMsg
,
8390 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8391 stats__ocacheL2_n_nodes_max
,
8392 stats__ocacheL2_n_nodes
);
8393 VG_(message
)(Vg_DebugMsg
,
8394 " niacache: %'12lu refs %'12lu misses\n",
8395 stats__nia_cache_queries
, stats__nia_cache_misses
);
8397 tl_assert(ocacheL1
== NULL
);
8398 for (UInt i
= 0; i
< 4096; i
++ ) {
8399 tl_assert(ocachesL2
[1] == NULL
);
8405 static void mc_fini ( Int exitcode
)
8407 MC_(xtmemory_report
) (VG_(clo_xtree_memory_file
), True
);
8408 MC_(print_malloc_stats
)();
8410 if (MC_(clo_leak_check
) != LC_Off
) {
8411 LeakCheckParams lcp
;
8412 HChar
* xt_filename
= NULL
;
8413 lcp
.mode
= MC_(clo_leak_check
);
8414 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
8415 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
8416 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
8417 lcp
.deltamode
= LCD_Any
;
8418 lcp
.max_loss_records_output
= 999999999;
8419 lcp
.requested_by_monitor_command
= False
;
8420 if (MC_(clo_xtree_leak
)) {
8421 xt_filename
= VG_(expand_file_name
)("--xtree-leak-file",
8422 MC_(clo_xtree_leak_file
));
8423 lcp
.xt_filename
= xt_filename
;
8425 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
8428 lcp
.xt_filename
= NULL
;
8429 MC_(detect_memory_leaks
)(1/*bogus ThreadId*/, &lcp
);
8430 if (MC_(clo_xtree_leak
))
8431 VG_(free
)(xt_filename
);
8433 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8435 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8441 if (MC_(any_value_errors
) && !VG_(clo_xml
) && VG_(clo_verbosity
) >= 1
8442 && MC_(clo_mc_level
) == 2) {
8443 VG_(message
)(Vg_UserMsg
,
8444 "Use --track-origins=yes to see where "
8445 "uninitialised values come from\n");
8448 /* Print a warning if any client-request generated ignore-ranges
8449 still exist. It would be reasonable to expect that a properly
8450 written program would remove any such ranges before exiting, and
8451 since they are a bit on the dangerous side, let's comment. By
8452 contrast ranges which are specified on the command line normally
8453 pertain to hardware mapped into the address space, and so we
8454 can't expect the client to have got rid of them. */
8455 if (gIgnoredAddressRanges
) {
8457 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
8458 UWord val
= IAR_INVALID
;
8459 UWord key_min
= ~(UWord
)0;
8460 UWord key_max
= (UWord
)0;
8461 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
8462 gIgnoredAddressRanges
, i
);
8463 if (val
!= IAR_ClientReq
)
8465 /* Print the offending range. Also, if it is the first,
8466 print a banner before it. */
8470 "WARNING: exiting program has the following client-requested\n"
8471 "WARNING: address error disablement range(s) still in force,\n"
8473 "possibly as a result of some mistake in the use of the\n"
8475 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8478 VG_(umsg
)(" [%u] 0x%016lx-0x%016lx %s\n",
8479 i
, key_min
, key_max
, showIARKind(val
));
8489 VG_(message
)(Vg_DebugMsg
,
8490 "------ Valgrind's client block stats follow ---------------\n" );
8491 show_client_block_stats();
8495 /* mark the given addr/len unaddressable for watchpoint implementation
8496 The PointKind will be handled at access time */
8497 static Bool
mc_mark_unaddressable_for_watchpoint (PointKind kind
, Bool insert
,
8498 Addr addr
, SizeT len
)
8500 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8501 accessibility and definedness in gdbserver so as to allow restoring it
8502 properly. Currently, we assume that the user only watches things
8503 which are properly addressable and defined */
8505 MC_(make_mem_noaccess
) (addr
, len
);
8507 MC_(make_mem_defined
) (addr
, len
);
8511 static void mc_pre_clo_init(void)
8513 VG_(details_name
) ("Memcheck");
8514 VG_(details_version
) (NULL
);
8515 VG_(details_description
) ("a memory error detector");
8516 VG_(details_copyright_author
)(
8517 "Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.");
8518 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
8519 VG_(details_avg_translation_sizeB
) ( 640 );
8521 VG_(basic_tool_funcs
) (mc_post_clo_init
,
8525 VG_(needs_final_IR_tidy_pass
) ( MC_(final_tidy
) );
8528 VG_(needs_core_errors
) ();
8529 VG_(needs_tool_errors
) (MC_(eq_Error
),
8530 MC_(before_pp_Error
),
8532 True
,/*show TIDs for errors*/
8533 MC_(update_Error_extra
),
8534 MC_(is_recognised_suppression
),
8535 MC_(read_extra_suppression_info
),
8536 MC_(error_matches_suppression
),
8537 MC_(get_error_name
),
8538 MC_(get_extra_suppression_info
),
8539 MC_(print_extra_suppression_use
),
8540 MC_(update_extra_suppression_use
));
8541 VG_(needs_libc_freeres
) ();
8542 VG_(needs_cxx_freeres
) ();
8543 VG_(needs_command_line_options
)(mc_process_cmd_line_options
,
8545 mc_print_debug_usage
);
8546 VG_(needs_client_requests
) (mc_handle_client_request
);
8547 VG_(needs_sanity_checks
) (mc_cheap_sanity_check
,
8548 mc_expensive_sanity_check
);
8549 VG_(needs_print_stats
) (mc_print_stats
);
8550 VG_(needs_info_location
) (MC_(pp_describe_addr
));
8551 VG_(needs_malloc_replacement
) (MC_(malloc
),
8553 MC_(__builtin_new_aligned
),
8554 MC_(__builtin_vec_new
),
8555 MC_(__builtin_vec_new_aligned
),
8559 MC_(__builtin_delete
),
8560 MC_(__builtin_delete_aligned
),
8561 MC_(__builtin_vec_delete
),
8562 MC_(__builtin_vec_delete_aligned
),
8564 MC_(malloc_usable_size
),
8565 MC_MALLOC_DEFAULT_REDZONE_SZB
);
8566 MC_(Malloc_Redzone_SzB
) = VG_(malloc_effective_client_redzone_size
)();
8568 VG_(needs_xml_output
) ();
8570 VG_(track_new_mem_startup
) ( mc_new_mem_startup
);
8572 // Handling of mmap and mprotect isn't simple (well, it is simple,
8573 // but the justification isn't.) See comments above, just prior to
8575 VG_(track_new_mem_mmap
) ( mc_new_mem_mmap
);
8576 VG_(track_change_mem_mprotect
) ( mc_new_mem_mprotect
);
8578 VG_(track_copy_mem_remap
) ( MC_(copy_address_range_state
) );
8580 VG_(track_die_mem_stack_signal
)( MC_(make_mem_noaccess
) );
8581 VG_(track_die_mem_brk
) ( MC_(make_mem_noaccess
) );
8582 VG_(track_die_mem_munmap
) ( MC_(make_mem_noaccess
) );
8584 /* Defer the specification of the new_mem_stack functions to the
8585 post_clo_init function, since we need to first parse the command
8586 line before deciding which set to use. */
8588 # ifdef PERF_FAST_STACK
8589 VG_(track_die_mem_stack_4
) ( mc_die_mem_stack_4
);
8590 VG_(track_die_mem_stack_8
) ( mc_die_mem_stack_8
);
8591 VG_(track_die_mem_stack_12
) ( mc_die_mem_stack_12
);
8592 VG_(track_die_mem_stack_16
) ( mc_die_mem_stack_16
);
8593 VG_(track_die_mem_stack_32
) ( mc_die_mem_stack_32
);
8594 VG_(track_die_mem_stack_112
) ( mc_die_mem_stack_112
);
8595 VG_(track_die_mem_stack_128
) ( mc_die_mem_stack_128
);
8596 VG_(track_die_mem_stack_144
) ( mc_die_mem_stack_144
);
8597 VG_(track_die_mem_stack_160
) ( mc_die_mem_stack_160
);
8599 VG_(track_die_mem_stack
) ( mc_die_mem_stack
);
8601 VG_(track_ban_mem_stack
) ( MC_(make_mem_noaccess
) );
8603 VG_(track_pre_mem_read
) ( check_mem_is_defined
);
8604 VG_(track_pre_mem_read_asciiz
) ( check_mem_is_defined_asciiz
);
8605 VG_(track_pre_mem_write
) ( check_mem_is_addressable
);
8606 VG_(track_post_mem_write
) ( mc_post_mem_write
);
8608 VG_(track_post_reg_write
) ( mc_post_reg_write
);
8609 VG_(track_post_reg_write_clientcall_return
)( mc_post_reg_write_clientcall
);
8611 if (MC_(clo_mc_level
) >= 2) {
8612 VG_(track_copy_mem_to_reg
) ( mc_copy_mem_to_reg
);
8613 VG_(track_copy_reg_to_mem
) ( mc_copy_reg_to_mem
);
8616 VG_(needs_watchpoint
) ( mc_mark_unaddressable_for_watchpoint
);
8618 init_shadow_memory();
8619 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8620 tl_assert(MC_(chunk_poolalloc
) == NULL
);
8621 MC_(malloc_list
) = VG_(HT_construct
)( "MC_(malloc_list)" );
8622 MC_(mempool_list
) = VG_(HT_construct
)( "MC_(mempool_list)" );
8625 tl_assert( mc_expensive_sanity_check() );
8627 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8628 tl_assert(sizeof(UWord
) == sizeof(Addr
));
8629 // Call me paranoid. I don't care.
8630 tl_assert(sizeof(void*) == sizeof(Addr
));
8632 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8633 tl_assert(-1 != VG_(log2
)(BYTES_PER_SEC_VBIT_NODE
));
8635 /* This is small. Always initialise it. */
8636 init_nia_to_ecu_cache();
8638 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8639 if we need to, since the command line args haven't been
8640 processed yet. Hence defer it to mc_post_clo_init. */
8641 tl_assert(ocacheL1
== NULL
);
8642 for (UInt i
= 0; i
< 4096; i
++ ) {
8643 tl_assert(ocachesL2
[i
] == NULL
);
8646 /* Check some important stuff. See extensive comments above
8647 re UNALIGNED_OR_HIGH for background. */
8648 # if VG_WORDSIZE == 4
8649 tl_assert(sizeof(void*) == 4);
8650 tl_assert(sizeof(Addr
) == 4);
8651 tl_assert(sizeof(UWord
) == 4);
8652 tl_assert(sizeof(Word
) == 4);
8653 tl_assert(MAX_PRIMARY_ADDRESS
== 0xFFFFFFFFUL
);
8654 tl_assert(MASK(1) == 0UL);
8655 tl_assert(MASK(2) == 1UL);
8656 tl_assert(MASK(4) == 3UL);
8657 tl_assert(MASK(8) == 7UL);
8659 tl_assert(VG_WORDSIZE
== 8);
8660 tl_assert(sizeof(void*) == 8);
8661 tl_assert(sizeof(Addr
) == 8);
8662 tl_assert(sizeof(UWord
) == 8);
8663 tl_assert(sizeof(Word
) == 8);
8664 tl_assert(MAX_PRIMARY_ADDRESS
== 0x1FFFFFFFFFULL
);
8665 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL
);
8666 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL
);
8667 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL
);
8668 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL
);
8671 /* Check some assertions to do with the instrumentation machinery. */
8672 MC_(do_instrumentation_startup_checks
)();
8675 STATIC_ASSERT(sizeof(UWord
) == sizeof(SizeT
));
8677 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init
)
8679 /*--------------------------------------------------------------------*/
8680 /*--- end mc_main.c ---*/
8681 /*--------------------------------------------------------------------*/