1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
7 /*--------------------------------------------------------------------*/
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
31 The GNU General Public License is contained in the file COPYING.
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h" // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 #include "pub_tool_xarray.h"
51 #include "pub_tool_xtree.h"
52 #include "pub_tool_xtmemory.h"
54 #include "mc_include.h"
55 #include "memcheck.h" /* for client requests */
57 /* Set to 1 to do a little more sanity checking */
58 #define VG_DEBUG_MEMORY 0
60 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
62 static void ocache_sarp_Set_Origins ( Addr
, UWord
, UInt
); /* fwds */
63 static void ocache_sarp_Clear_Origins ( Addr
, UWord
); /* fwds */
66 /*------------------------------------------------------------*/
67 /*--- Fast-case knobs ---*/
68 /*------------------------------------------------------------*/
70 // Comment these out to disable the fast cases (don't just set them to zero).
72 /* PERF_FAST_LOADV is in mc_include.h */
73 #define PERF_FAST_STOREV 1
75 #define PERF_FAST_SARP 1
77 #define PERF_FAST_STACK 1
78 #define PERF_FAST_STACK2 1
80 /* Change this to 1 to enable assertions on origin tracking cache fast
82 #define OC_ENABLE_ASSERTIONS 0
85 /*------------------------------------------------------------*/
86 /*--- Comments on the origin tracking implementation ---*/
87 /*------------------------------------------------------------*/
89 /* See detailed comment entitled
90 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
91 which is contained further on in this file. */
94 /*------------------------------------------------------------*/
95 /*--- V bits and A bits ---*/
96 /*------------------------------------------------------------*/
98 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
99 thinks the corresponding value bit is defined. And every memory byte
100 has an A bit, which tracks whether Memcheck thinks the program can access
101 it safely (ie. it's mapped, and has at least one of the RWX permission bits
102 set). So every N-bit register is shadowed with N V bits, and every memory
103 byte is shadowed with 8 V bits and one A bit.
105 In the implementation, we use two forms of compression (compressed V bits
106 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
109 Memcheck also tracks extra information about each heap block that is
110 allocated, for detecting memory leaks and other purposes.
113 /*------------------------------------------------------------*/
114 /*--- Basic A/V bitmap representation. ---*/
115 /*------------------------------------------------------------*/
117 /* All reads and writes are checked against a memory map (a.k.a. shadow
118 memory), which records the state of all memory in the process.
120 On 32-bit machines the memory map is organised as follows.
121 The top 16 bits of an address are used to index into a top-level
122 map table, containing 65536 entries. Each entry is a pointer to a
123 second-level map, which records the accesibililty and validity
124 permissions for the 65536 bytes indexed by the lower 16 bits of the
125 address. Each byte is represented by two bits (details are below). So
126 each second-level map contains 16384 bytes. This two-level arrangement
127 conveniently divides the 4G address space into 64k lumps, each size 64k
130 All entries in the primary (top-level) map must point to a valid
131 secondary (second-level) map. Since many of the 64kB chunks will
132 have the same status for every bit -- ie. noaccess (for unused
133 address space) or entirely addressable and defined (for code segments) --
134 there are three distinguished secondary maps, which indicate 'noaccess',
135 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
136 map entry points to the relevant distinguished map. In practice,
137 typically more than half of the addressable memory is represented with
138 the 'undefined' or 'defined' distinguished secondary map, so it gives a
139 good saving. It also lets us set the V+A bits of large address regions
140 quickly in set_address_range_perms().
142 On 64-bit machines it's more complicated. If we followed the same basic
143 scheme we'd have a four-level table which would require too many memory
144 accesses. So instead the top-level map table has 2^20 entries (indexed
145 using bits 16..35 of the address); this covers the bottom 64GB. Any
146 accesses above 64GB are handled with a slow, sparse auxiliary table.
147 Valgrind's address space manager tries very hard to keep things below
148 this 64GB barrier so that performance doesn't suffer too much.
150 Note that this file has a lot of different functions for reading and
151 writing shadow memory. Only a couple are strictly necessary (eg.
152 get_vabits2 and set_vabits2), most are just specialised for specific
153 common cases to improve performance.
155 Aside: the V+A bits are less precise than they could be -- we have no way
156 of marking memory as read-only. It would be great if we could add an
157 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
158 which requires 2.3 bits to hold, and there's no way to do that elegantly
159 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
163 /* --------------- Basic configuration --------------- */
165 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
169 /* cover the entire address space */
170 # define N_PRIMARY_BITS 16
174 /* Just handle the first 128G fast and the rest via auxiliary
175 primaries. If you change this, Memcheck will assert at startup.
176 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
177 # define N_PRIMARY_BITS 21
182 /* Do not change this. */
183 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
185 /* Do not change this. */
186 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
189 /* --------------- Secondary maps --------------- */
191 // Each byte of memory conceptually has an A bit, which indicates its
192 // addressability, and 8 V bits, which indicates its definedness.
194 // But because very few bytes are partially defined, we can use a nice
195 // compression scheme to reduce the size of shadow memory. Each byte of
196 // memory has 2 bits which indicates its state (ie. V+A bits):
198 // 00: noaccess (unaddressable but treated as fully defined)
199 // 01: undefined (addressable and fully undefined)
200 // 10: defined (addressable and fully defined)
201 // 11: partdefined (addressable and partially defined)
203 // In the "partdefined" case, we use a secondary table to store the V bits.
204 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
207 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
208 // four bytes (32 bits) of memory are in each chunk. Hence the name
209 // "vabits8". This lets us get the V+A bits for four bytes at a time
210 // easily (without having to do any shifting and/or masking), and that is a
211 // very common operation. (Note that although each vabits8 chunk
212 // is 8 bits in size, it represents 32 bits of memory.)
214 // The representation is "inverse" little-endian... each 4 bytes of
215 // memory is represented by a 1 byte value, where:
217 // - the status of byte (a+0) is held in bits [1..0]
218 // - the status of byte (a+1) is held in bits [3..2]
219 // - the status of byte (a+2) is held in bits [5..4]
220 // - the status of byte (a+3) is held in bits [7..6]
222 // It's "inverse" because endianness normally describes a mapping from
223 // value bits to memory addresses; in this case the mapping is inverted.
224 // Ie. instead of particular value bits being held in certain addresses, in
225 // this case certain addresses are represented by particular value bits.
226 // See insert_vabits2_into_vabits8() for an example.
228 // But note that we don't compress the V bits stored in registers; they
229 // need to be explicit to made the shadow operations possible. Therefore
230 // when moving values between registers and memory we need to convert
231 // between the expanded in-register format and the compressed in-memory
232 // format. This isn't so difficult, it just requires careful attention in a
235 // These represent eight bits of memory.
236 #define VA_BITS2_NOACCESS 0x0 // 00b
237 #define VA_BITS2_UNDEFINED 0x1 // 01b
238 #define VA_BITS2_DEFINED 0x2 // 10b
239 #define VA_BITS2_PARTDEFINED 0x3 // 11b
241 // These represent 16 bits of memory.
242 #define VA_BITS4_NOACCESS 0x0 // 00_00b
243 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
244 #define VA_BITS4_DEFINED 0xa // 10_10b
246 // These represent 32 bits of memory.
247 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
248 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
249 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
251 // These represent 64 bits of memory.
252 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
253 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
254 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
256 // These represent 128 bits of memory.
257 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
260 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
261 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
262 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
264 // Paranoia: it's critical for performance that the requested inlining
265 // occurs. So try extra hard.
266 #define INLINE inline __attribute__((always_inline))
268 static INLINE Addr
start_of_this_sm ( Addr a
) {
269 return (a
& (~SM_MASK
));
271 static INLINE Bool
is_start_of_sm ( Addr a
) {
272 return (start_of_this_sm(a
) == a
);
275 STATIC_ASSERT(SM_CHUNKS
% 2 == 0);
279 UChar vabits8
[SM_CHUNKS
];
280 UShort vabits16
[SM_CHUNKS
/2];
284 // 3 distinguished secondary maps, one for no-access, one for
285 // accessible but undefined, and one for accessible and defined.
286 // Distinguished secondaries may never be modified.
287 #define SM_DIST_NOACCESS 0
288 #define SM_DIST_UNDEFINED 1
289 #define SM_DIST_DEFINED 2
291 static SecMap sm_distinguished
[3];
293 static INLINE Bool
is_distinguished_sm ( SecMap
* sm
) {
294 return sm
>= &sm_distinguished
[0] && sm
<= &sm_distinguished
[2];
297 // Forward declaration
298 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
);
300 /* dist_sm points to one of our three distinguished secondaries. Make
301 a copy of it so that we can write to it.
303 static SecMap
* copy_for_writing ( SecMap
* dist_sm
)
306 tl_assert(dist_sm
== &sm_distinguished
[0]
307 || dist_sm
== &sm_distinguished
[1]
308 || dist_sm
== &sm_distinguished
[2]);
310 new_sm
= VG_(am_shadow_alloc
)(sizeof(SecMap
));
312 VG_(out_of_memory_NORETURN
)( "memcheck:allocate new SecMap",
314 VG_(memcpy
)(new_sm
, dist_sm
, sizeof(SecMap
));
315 update_SM_counts(dist_sm
, new_sm
);
319 /* --------------- Stats --------------- */
321 static Int n_issued_SMs
= 0;
322 static Int n_deissued_SMs
= 0;
323 static Int n_noaccess_SMs
= N_PRIMARY_MAP
; // start with many noaccess DSMs
324 static Int n_undefined_SMs
= 0;
325 static Int n_defined_SMs
= 0;
326 static Int n_non_DSM_SMs
= 0;
327 static Int max_noaccess_SMs
= 0;
328 static Int max_undefined_SMs
= 0;
329 static Int max_defined_SMs
= 0;
330 static Int max_non_DSM_SMs
= 0;
332 /* # searches initiated in auxmap_L1, and # base cmps required */
333 static ULong n_auxmap_L1_searches
= 0;
334 static ULong n_auxmap_L1_cmps
= 0;
335 /* # of searches that missed in auxmap_L1 and therefore had to
336 be handed to auxmap_L2. And the number of nodes inserted. */
337 static ULong n_auxmap_L2_searches
= 0;
338 static ULong n_auxmap_L2_nodes
= 0;
340 static Int n_sanity_cheap
= 0;
341 static Int n_sanity_expensive
= 0;
343 static Int n_secVBit_nodes
= 0;
344 static Int max_secVBit_nodes
= 0;
346 static void update_SM_counts(SecMap
* oldSM
, SecMap
* newSM
)
348 if (oldSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
--;
349 else if (oldSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
--;
350 else if (oldSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
--;
351 else { n_non_DSM_SMs
--;
354 if (newSM
== &sm_distinguished
[SM_DIST_NOACCESS
]) n_noaccess_SMs
++;
355 else if (newSM
== &sm_distinguished
[SM_DIST_UNDEFINED
]) n_undefined_SMs
++;
356 else if (newSM
== &sm_distinguished
[SM_DIST_DEFINED
]) n_defined_SMs
++;
357 else { n_non_DSM_SMs
++;
360 if (n_noaccess_SMs
> max_noaccess_SMs
) max_noaccess_SMs
= n_noaccess_SMs
;
361 if (n_undefined_SMs
> max_undefined_SMs
) max_undefined_SMs
= n_undefined_SMs
;
362 if (n_defined_SMs
> max_defined_SMs
) max_defined_SMs
= n_defined_SMs
;
363 if (n_non_DSM_SMs
> max_non_DSM_SMs
) max_non_DSM_SMs
= n_non_DSM_SMs
;
366 /* --------------- Primary maps --------------- */
368 /* The main primary map. This covers some initial part of the address
369 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
370 handled using the auxiliary primary map.
372 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
373 && (defined(VGP_arm_linux) \
374 || defined(VGP_x86_linux) || defined(VGP_x86_solaris))
375 /* mc_main_asm.c needs visibility on a few things declared in this file.
376 MC_MAIN_STATIC allows to define them static if ok, i.e. on
377 platforms that are not using hand-coded asm statements. */
378 #define MC_MAIN_STATIC
380 #define MC_MAIN_STATIC static
382 MC_MAIN_STATIC SecMap
* primary_map
[N_PRIMARY_MAP
];
385 /* An entry in the auxiliary primary map. base must be a 64k-aligned
386 value, and sm points at the relevant secondary map. As with the
387 main primary map, the secondary may be either a real secondary, or
388 one of the three distinguished secondaries. DO NOT CHANGE THIS
389 LAYOUT: the first word has to be the key for OSet fast lookups.
398 /* Tunable parameter: How big is the L1 queue? */
399 #define N_AUXMAP_L1 24
401 /* Tunable parameter: How far along the L1 queue to insert
402 entries resulting from L2 lookups? */
403 #define AUXMAP_L1_INSERT_IX 12
407 AuxMapEnt
* ent
; // pointer to the matching auxmap_L2 node
409 auxmap_L1
[N_AUXMAP_L1
];
411 static OSet
* auxmap_L2
= NULL
;
413 static void init_auxmap_L1_L2 ( void )
416 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
417 auxmap_L1
[i
].base
= 0;
418 auxmap_L1
[i
].ent
= NULL
;
421 tl_assert(0 == offsetof(AuxMapEnt
,base
));
422 tl_assert(sizeof(Addr
) == sizeof(void*));
423 auxmap_L2
= VG_(OSetGen_Create
)( /*keyOff*/ offsetof(AuxMapEnt
,base
),
425 VG_(malloc
), "mc.iaLL.1", VG_(free
) );
428 /* Check representation invariants; if OK return NULL; else a
429 descriptive bit of text. Also return the number of
430 non-distinguished secondary maps referred to from the auxiliary
433 static const HChar
* check_auxmap_L1_L2_sanity ( Word
* n_secmaps_found
)
436 /* On a 32-bit platform, the L2 and L1 tables should
437 both remain empty forever.
439 On a 64-bit platform:
441 all .base & 0xFFFF == 0
442 all .base > MAX_PRIMARY_ADDRESS
444 all .base & 0xFFFF == 0
445 all (.base > MAX_PRIMARY_ADDRESS
447 and .ent points to an AuxMapEnt with the same .base)
449 (.base == 0 and .ent == NULL)
451 *n_secmaps_found
= 0;
452 if (sizeof(void*) == 4) {
453 /* 32-bit platform */
454 if (VG_(OSetGen_Size
)(auxmap_L2
) != 0)
455 return "32-bit: auxmap_L2 is non-empty";
456 for (i
= 0; i
< N_AUXMAP_L1
; i
++)
457 if (auxmap_L1
[i
].base
!= 0 || auxmap_L1
[i
].ent
!= NULL
)
458 return "32-bit: auxmap_L1 is non-empty";
460 /* 64-bit platform */
461 UWord elems_seen
= 0;
462 AuxMapEnt
*elem
, *res
;
465 VG_(OSetGen_ResetIter
)(auxmap_L2
);
466 while ( (elem
= VG_(OSetGen_Next
)(auxmap_L2
)) ) {
468 if (0 != (elem
->base
& (Addr
)0xFFFF))
469 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
470 if (elem
->base
<= MAX_PRIMARY_ADDRESS
)
471 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
472 if (elem
->sm
== NULL
)
473 return "64-bit: .sm in _L2 is NULL";
474 if (!is_distinguished_sm(elem
->sm
))
475 (*n_secmaps_found
)++;
477 if (elems_seen
!= n_auxmap_L2_nodes
)
478 return "64-bit: disagreement on number of elems in _L2";
479 /* Check L1-L2 correspondence */
480 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
481 if (auxmap_L1
[i
].base
== 0 && auxmap_L1
[i
].ent
== NULL
)
483 if (0 != (auxmap_L1
[i
].base
& (Addr
)0xFFFF))
484 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
485 if (auxmap_L1
[i
].base
<= MAX_PRIMARY_ADDRESS
)
486 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
487 if (auxmap_L1
[i
].ent
== NULL
)
488 return "64-bit: .ent is NULL in auxmap_L1";
489 if (auxmap_L1
[i
].ent
->base
!= auxmap_L1
[i
].base
)
490 return "64-bit: _L1 and _L2 bases are inconsistent";
491 /* Look it up in auxmap_L2. */
492 key
.base
= auxmap_L1
[i
].base
;
494 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
496 return "64-bit: _L1 .base not found in _L2";
497 if (res
!= auxmap_L1
[i
].ent
)
498 return "64-bit: _L1 .ent disagrees with _L2 entry";
500 /* Check L1 contains no duplicates */
501 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
502 if (auxmap_L1
[i
].base
== 0)
504 for (j
= i
+1; j
< N_AUXMAP_L1
; j
++) {
505 if (auxmap_L1
[j
].base
== 0)
507 if (auxmap_L1
[j
].base
== auxmap_L1
[i
].base
)
508 return "64-bit: duplicate _L1 .base entries";
512 return NULL
; /* ok */
515 static void insert_into_auxmap_L1_at ( Word rank
, AuxMapEnt
* ent
)
519 tl_assert(rank
>= 0 && rank
< N_AUXMAP_L1
);
520 for (i
= N_AUXMAP_L1
-1; i
> rank
; i
--)
521 auxmap_L1
[i
] = auxmap_L1
[i
-1];
522 auxmap_L1
[rank
].base
= ent
->base
;
523 auxmap_L1
[rank
].ent
= ent
;
526 static INLINE AuxMapEnt
* maybe_find_in_auxmap ( Addr a
)
532 tl_assert(a
> MAX_PRIMARY_ADDRESS
);
535 /* First search the front-cache, which is a self-organising
536 list containing the most popular entries. */
538 if (LIKELY(auxmap_L1
[0].base
== a
))
539 return auxmap_L1
[0].ent
;
540 if (LIKELY(auxmap_L1
[1].base
== a
)) {
541 Addr t_base
= auxmap_L1
[0].base
;
542 AuxMapEnt
* t_ent
= auxmap_L1
[0].ent
;
543 auxmap_L1
[0].base
= auxmap_L1
[1].base
;
544 auxmap_L1
[0].ent
= auxmap_L1
[1].ent
;
545 auxmap_L1
[1].base
= t_base
;
546 auxmap_L1
[1].ent
= t_ent
;
547 return auxmap_L1
[0].ent
;
550 n_auxmap_L1_searches
++;
552 for (i
= 0; i
< N_AUXMAP_L1
; i
++) {
553 if (auxmap_L1
[i
].base
== a
) {
557 tl_assert(i
>= 0 && i
<= N_AUXMAP_L1
);
559 n_auxmap_L1_cmps
+= (ULong
)(i
+1);
561 if (i
< N_AUXMAP_L1
) {
563 Addr t_base
= auxmap_L1
[i
-1].base
;
564 AuxMapEnt
* t_ent
= auxmap_L1
[i
-1].ent
;
565 auxmap_L1
[i
-1].base
= auxmap_L1
[i
-0].base
;
566 auxmap_L1
[i
-1].ent
= auxmap_L1
[i
-0].ent
;
567 auxmap_L1
[i
-0].base
= t_base
;
568 auxmap_L1
[i
-0].ent
= t_ent
;
571 return auxmap_L1
[i
].ent
;
574 n_auxmap_L2_searches
++;
576 /* First see if we already have it. */
580 res
= VG_(OSetGen_Lookup
)(auxmap_L2
, &key
);
582 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, res
);
586 static AuxMapEnt
* find_or_alloc_in_auxmap ( Addr a
)
588 AuxMapEnt
*nyu
, *res
;
590 /* First see if we already have it. */
591 res
= maybe_find_in_auxmap( a
);
595 /* Ok, there's no entry in the secondary map, so we'll have
599 nyu
= (AuxMapEnt
*) VG_(OSetGen_AllocNode
)( auxmap_L2
, sizeof(AuxMapEnt
) );
601 nyu
->sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
602 VG_(OSetGen_Insert
)( auxmap_L2
, nyu
);
603 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX
, nyu
);
608 /* --------------- SecMap fundamentals --------------- */
610 // In all these, 'low' means it's definitely in the main primary map,
611 // 'high' means it's definitely in the auxiliary table.
613 static INLINE UWord
get_primary_map_low_offset ( Addr a
)
615 UWord pm_off
= a
>> 16;
619 static INLINE SecMap
** get_secmap_low_ptr ( Addr a
)
621 UWord pm_off
= a
>> 16;
622 # if VG_DEBUG_MEMORY >= 1
623 tl_assert(pm_off
< N_PRIMARY_MAP
);
625 return &primary_map
[ pm_off
];
628 static INLINE SecMap
** get_secmap_high_ptr ( Addr a
)
630 AuxMapEnt
* am
= find_or_alloc_in_auxmap(a
);
634 static INLINE SecMap
** get_secmap_ptr ( Addr a
)
636 return ( a
<= MAX_PRIMARY_ADDRESS
637 ? get_secmap_low_ptr(a
)
638 : get_secmap_high_ptr(a
));
641 static INLINE SecMap
* get_secmap_for_reading_low ( Addr a
)
643 return *get_secmap_low_ptr(a
);
646 static INLINE SecMap
* get_secmap_for_reading_high ( Addr a
)
648 return *get_secmap_high_ptr(a
);
651 static INLINE SecMap
* get_secmap_for_writing_low(Addr a
)
653 SecMap
** p
= get_secmap_low_ptr(a
);
654 if (UNLIKELY(is_distinguished_sm(*p
)))
655 *p
= copy_for_writing(*p
);
659 static INLINE SecMap
* get_secmap_for_writing_high ( Addr a
)
661 SecMap
** p
= get_secmap_high_ptr(a
);
662 if (UNLIKELY(is_distinguished_sm(*p
)))
663 *p
= copy_for_writing(*p
);
667 /* Produce the secmap for 'a', either from the primary map or by
668 ensuring there is an entry for it in the aux primary map. The
669 secmap may be a distinguished one as the caller will only want to
672 static INLINE SecMap
* get_secmap_for_reading ( Addr a
)
674 return ( a
<= MAX_PRIMARY_ADDRESS
675 ? get_secmap_for_reading_low (a
)
676 : get_secmap_for_reading_high(a
) );
679 /* Produce the secmap for 'a', either from the primary map or by
680 ensuring there is an entry for it in the aux primary map. The
681 secmap may not be a distinguished one, since the caller will want
682 to be able to write it. If it is a distinguished secondary, make a
683 writable copy of it, install it, and return the copy instead. (COW
686 static INLINE SecMap
* get_secmap_for_writing ( Addr a
)
688 return ( a
<= MAX_PRIMARY_ADDRESS
689 ? get_secmap_for_writing_low (a
)
690 : get_secmap_for_writing_high(a
) );
693 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
694 allocate one if one doesn't already exist. This is used by the
697 static SecMap
* maybe_get_secmap_for ( Addr a
)
699 if (a
<= MAX_PRIMARY_ADDRESS
) {
700 return get_secmap_for_reading_low(a
);
702 AuxMapEnt
* am
= maybe_find_in_auxmap(a
);
703 return am
? am
->sm
: NULL
;
707 /* --------------- Fundamental functions --------------- */
710 void insert_vabits2_into_vabits8 ( Addr a
, UChar vabits2
, UChar
* vabits8
)
712 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
713 *vabits8
&= ~(0x3 << shift
); // mask out the two old bits
714 *vabits8
|= (vabits2
<< shift
); // mask in the two new bits
718 void insert_vabits4_into_vabits8 ( Addr a
, UChar vabits4
, UChar
* vabits8
)
721 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
722 shift
= (a
& 2) << 1; // shift by 0 or 4
723 *vabits8
&= ~(0xf << shift
); // mask out the four old bits
724 *vabits8
|= (vabits4
<< shift
); // mask in the four new bits
728 UChar
extract_vabits2_from_vabits8 ( Addr a
, UChar vabits8
)
730 UInt shift
= (a
& 3) << 1; // shift by 0, 2, 4, or 6
731 vabits8
>>= shift
; // shift the two bits to the bottom
732 return 0x3 & vabits8
; // mask out the rest
736 UChar
extract_vabits4_from_vabits8 ( Addr a
, UChar vabits8
)
739 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
740 shift
= (a
& 2) << 1; // shift by 0 or 4
741 vabits8
>>= shift
; // shift the four bits to the bottom
742 return 0xf & vabits8
; // mask out the rest
745 // Note that these four are only used in slow cases. The fast cases do
746 // clever things like combine the auxmap check (in
747 // get_secmap_{read,writ}able) with alignment checks.
750 // Any time this function is called, if it is possible that vabits2
751 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
752 // sec-V-bits table must also be set!
754 void set_vabits2 ( Addr a
, UChar vabits2
)
756 SecMap
* sm
= get_secmap_for_writing(a
);
757 UWord sm_off
= SM_OFF(a
);
758 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
762 UChar
get_vabits2 ( Addr a
)
764 SecMap
* sm
= get_secmap_for_reading(a
);
765 UWord sm_off
= SM_OFF(a
);
766 UChar vabits8
= sm
->vabits8
[sm_off
];
767 return extract_vabits2_from_vabits8(a
, vabits8
);
771 // Any time this function is called, if it is possible that any of the
772 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
773 // corresponding entry(s) in the sec-V-bits table must also be set!
775 UChar
get_vabits8_for_aligned_word32 ( Addr a
)
777 SecMap
* sm
= get_secmap_for_reading(a
);
778 UWord sm_off
= SM_OFF(a
);
779 UChar vabits8
= sm
->vabits8
[sm_off
];
784 void set_vabits8_for_aligned_word32 ( Addr a
, UChar vabits8
)
786 SecMap
* sm
= get_secmap_for_writing(a
);
787 UWord sm_off
= SM_OFF(a
);
788 sm
->vabits8
[sm_off
] = vabits8
;
792 // Forward declarations
793 static UWord
get_sec_vbits8(Addr a
);
794 static void set_sec_vbits8(Addr a
, UWord vbits8
);
796 // Returns False if there was an addressability error.
798 Bool
set_vbits8 ( Addr a
, UChar vbits8
)
801 UChar vabits2
= get_vabits2(a
);
802 if ( VA_BITS2_NOACCESS
!= vabits2
) {
803 // Addressable. Convert in-register format to in-memory format.
804 // Also remove any existing sec V bit entry for the byte if no
806 if ( V_BITS8_DEFINED
== vbits8
) { vabits2
= VA_BITS2_DEFINED
; }
807 else if ( V_BITS8_UNDEFINED
== vbits8
) { vabits2
= VA_BITS2_UNDEFINED
; }
808 else { vabits2
= VA_BITS2_PARTDEFINED
;
809 set_sec_vbits8(a
, vbits8
); }
810 set_vabits2(a
, vabits2
);
813 // Unaddressable! Do nothing -- when writing to unaddressable
814 // memory it acts as a black hole, and the V bits can never be seen
815 // again. So we don't have to write them at all.
821 // Returns False if there was an addressability error. In that case, we put
822 // all defined bits into vbits8.
824 Bool
get_vbits8 ( Addr a
, UChar
* vbits8
)
827 UChar vabits2
= get_vabits2(a
);
829 // Convert the in-memory format to in-register format.
830 if ( VA_BITS2_DEFINED
== vabits2
) { *vbits8
= V_BITS8_DEFINED
; }
831 else if ( VA_BITS2_UNDEFINED
== vabits2
) { *vbits8
= V_BITS8_UNDEFINED
; }
832 else if ( VA_BITS2_NOACCESS
== vabits2
) {
833 *vbits8
= V_BITS8_DEFINED
; // Make V bits defined!
836 tl_assert( VA_BITS2_PARTDEFINED
== vabits2
);
837 *vbits8
= get_sec_vbits8(a
);
843 /* --------------- Secondary V bit table ------------ */
845 // This table holds the full V bit pattern for partially-defined bytes
846 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
849 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
850 // then overwrite the same address with a fully defined byte, the sec-V-bit
851 // node will not necessarily be removed. This is because checking for
852 // whether removal is necessary would slow down the fast paths.
854 // To avoid the stale nodes building up too much, we periodically (once the
855 // table reaches a certain size) garbage collect (GC) the table by
856 // traversing it and evicting any nodes not having PDB.
857 // If more than a certain proportion of nodes survived, we increase the
858 // table size so that GCs occur less often.
860 // This policy is designed to avoid bad table bloat in the worst case where
861 // a program creates huge numbers of stale PDBs -- we would get this bloat
862 // if we had no GC -- while handling well the case where a node becomes
863 // stale but shortly afterwards is rewritten with a PDB and so becomes
864 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
865 // remove all stale nodes as soon as possible, we just end up re-adding a
866 // lot of them in later again. The "sufficiently stale" approach avoids
867 // this. (If a program has many live PDBs, performance will just suck,
868 // there's no way around that.)
870 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
871 // holding on to stale entries for 2 GCs before discarding them can lead
872 // to massive space leaks. So we're changing to an arrangement where
873 // lines are evicted as soon as they are observed to be stale during a
874 // GC. This also has a side benefit of allowing the sufficiently_stale
875 // field to be removed from the SecVBitNode struct, reducing its size by
876 // 8 bytes, which is a substantial space saving considering that the
877 // struct was previously 32 or so bytes, on a 64 bit target.
879 // In order to try and mitigate the problem that the "sufficiently stale"
880 // heuristic was designed to avoid, the table size is allowed to drift
881 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
882 // means that nodes will exist in the table longer on average, and hopefully
883 // will be deleted and re-added less frequently.
885 // The previous scaling up mechanism (now called STEPUP) is retained:
886 // if residency exceeds 50%, the table is scaled up, although by a
887 // factor sqrt(2) rather than 2 as before. This effectively doubles the
888 // frequency of GCs when there are many PDBs at reduces the tendency of
889 // stale PDBs to reside for long periods in the table.
891 static OSet
* secVBitTable
;
894 static ULong sec_vbits_new_nodes
= 0;
895 static ULong sec_vbits_updates
= 0;
897 // This must be a power of two; this is checked in mc_pre_clo_init().
898 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
899 // a larger address range) they take more space but we can get multiple
900 // partially-defined bytes in one if they are close to each other, reducing
901 // the number of total nodes. In practice sometimes they are clustered (eg.
902 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
903 // row), but often not. So we choose something intermediate.
904 #define BYTES_PER_SEC_VBIT_NODE 16
906 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
907 // more than this many nodes survive a GC.
908 #define STEPUP_SURVIVOR_PROPORTION 0.5
909 #define STEPUP_GROWTH_FACTOR 1.414213562
911 // If the above heuristic doesn't apply, then we may make the table
912 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
913 // this many nodes survive a GC, _and_ the total table size does
914 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
915 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
916 // effectively although gradually reduces residency and increases time
917 // between GCs for programs with small numbers of PDBs. The 80000 limit
918 // effectively limits the table size to around 2MB for programs with
919 // small numbers of PDBs, whilst giving a reasonably long lifetime to
920 // entries, to try and reduce the costs resulting from deleting and
921 // re-adding of entries.
922 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
923 #define DRIFTUP_GROWTH_FACTOR 1.015
924 #define DRIFTUP_MAX_SIZE 80000
926 // We GC the table when it gets this many nodes in it, ie. it's effectively
927 // the table size. It can change.
928 static Int secVBitLimit
= 1000;
930 // The number of GCs done, used to age sec-V-bit nodes for eviction.
931 // Because it's unsigned, wrapping doesn't matter -- the right answer will
933 static UInt GCs_done
= 0;
938 UChar vbits8
[BYTES_PER_SEC_VBIT_NODE
];
942 static OSet
* createSecVBitTable(void)
944 OSet
* newSecVBitTable
;
945 newSecVBitTable
= VG_(OSetGen_Create_With_Pool
)
946 ( offsetof(SecVBitNode
, a
),
947 NULL
, // use fast comparisons
948 VG_(malloc
), "mc.cSVT.1 (sec VBit table)",
951 sizeof(SecVBitNode
));
952 return newSecVBitTable
;
955 static void gcSecVBitTable(void)
959 Int i
, n_nodes
= 0, n_survivors
= 0;
963 // Create the new table.
964 secVBitTable2
= createSecVBitTable();
966 // Traverse the table, moving fresh nodes into the new table.
967 VG_(OSetGen_ResetIter
)(secVBitTable
);
968 while ( (n
= VG_(OSetGen_Next
)(secVBitTable
)) ) {
969 // Keep node if any of its bytes are non-stale. Using
970 // get_vabits2() for the lookup is not very efficient, but I don't
972 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
973 if (VA_BITS2_PARTDEFINED
== get_vabits2(n
->a
+ i
)) {
974 // Found a non-stale byte, so keep =>
975 // Insert a copy of the node into the new table.
977 VG_(OSetGen_AllocNode
)(secVBitTable2
, sizeof(SecVBitNode
));
979 VG_(OSetGen_Insert
)(secVBitTable2
, n2
);
985 // Get the before and after sizes.
986 n_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
987 n_survivors
= VG_(OSetGen_Size
)(secVBitTable2
);
989 // Destroy the old table, and put the new one in its place.
990 VG_(OSetGen_Destroy
)(secVBitTable
);
991 secVBitTable
= secVBitTable2
;
993 if (VG_(clo_verbosity
) > 1 && n_nodes
!= 0) {
994 VG_(message
)(Vg_DebugMsg
, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
995 n_nodes
, n_survivors
, n_survivors
* 100.0 / n_nodes
);
998 // Increase table size if necessary.
999 if ((Double
)n_survivors
1000 > ((Double
)secVBitLimit
* STEPUP_SURVIVOR_PROPORTION
)) {
1001 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)STEPUP_GROWTH_FACTOR
);
1002 if (VG_(clo_verbosity
) > 1)
1003 VG_(message
)(Vg_DebugMsg
,
1004 "memcheck GC: %d new table size (stepup)\n",
1008 if (secVBitLimit
< DRIFTUP_MAX_SIZE
1009 && (Double
)n_survivors
1010 > ((Double
)secVBitLimit
* DRIFTUP_SURVIVOR_PROPORTION
)) {
1011 secVBitLimit
= (Int
)((Double
)secVBitLimit
* (Double
)DRIFTUP_GROWTH_FACTOR
);
1012 if (VG_(clo_verbosity
) > 1)
1013 VG_(message
)(Vg_DebugMsg
,
1014 "memcheck GC: %d new table size (driftup)\n",
1019 static UWord
get_sec_vbits8(Addr a
)
1021 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1022 Int amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1023 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1025 tl_assert2(n
, "get_sec_vbits8: no node for address %p (%p)\n", aAligned
, a
);
1026 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1027 // make it to the secondary V bits table.
1028 vbits8
= n
->vbits8
[amod
];
1029 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1033 static void set_sec_vbits8(Addr a
, UWord vbits8
)
1035 Addr aAligned
= VG_ROUNDDN(a
, BYTES_PER_SEC_VBIT_NODE
);
1036 Int i
, amod
= a
% BYTES_PER_SEC_VBIT_NODE
;
1037 SecVBitNode
* n
= VG_(OSetGen_Lookup
)(secVBitTable
, &aAligned
);
1038 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1039 // make it to the secondary V bits table.
1040 tl_assert(V_BITS8_DEFINED
!= vbits8
&& V_BITS8_UNDEFINED
!= vbits8
);
1042 n
->vbits8
[amod
] = vbits8
; // update
1043 sec_vbits_updates
++;
1045 // Do a table GC if necessary. Nb: do this before creating and
1046 // inserting the new node, to avoid erroneously GC'ing the new node.
1047 if (secVBitLimit
== VG_(OSetGen_Size
)(secVBitTable
)) {
1051 // New node: assign the specific byte, make the rest invalid (they
1052 // should never be read as-is, but be cautious).
1053 n
= VG_(OSetGen_AllocNode
)(secVBitTable
, sizeof(SecVBitNode
));
1055 for (i
= 0; i
< BYTES_PER_SEC_VBIT_NODE
; i
++) {
1056 n
->vbits8
[i
] = V_BITS8_UNDEFINED
;
1058 n
->vbits8
[amod
] = vbits8
;
1060 // Insert the new node.
1061 VG_(OSetGen_Insert
)(secVBitTable
, n
);
1062 sec_vbits_new_nodes
++;
1064 n_secVBit_nodes
= VG_(OSetGen_Size
)(secVBitTable
);
1065 if (n_secVBit_nodes
> max_secVBit_nodes
)
1066 max_secVBit_nodes
= n_secVBit_nodes
;
1070 /* --------------- Endianness helpers --------------- */
1072 /* Returns the offset in memory of the byteno-th most significant byte
1073 in a wordszB-sized word, given the specified endianness. */
1074 static INLINE UWord
byte_offset_w ( UWord wordszB
, Bool bigendian
,
1076 return bigendian
? (wordszB
-1-byteno
) : byteno
;
1080 /* --------------- Ignored address ranges --------------- */
1082 /* Denotes the address-error-reportability status for address ranges:
1083 IAR_NotIgnored: the usual case -- report errors in this range
1084 IAR_CommandLine: don't report errors -- from command line setting
1085 IAR_ClientReq: don't report errors -- from client request
1088 enum { IAR_INVALID
=99,
1094 static const HChar
* showIARKind ( IARKind iark
)
1097 case IAR_INVALID
: return "INVALID";
1098 case IAR_NotIgnored
: return "NotIgnored";
1099 case IAR_CommandLine
: return "CommandLine";
1100 case IAR_ClientReq
: return "ClientReq";
1101 default: return "???";
1105 // RangeMap<IARKind>
1106 static RangeMap
* gIgnoredAddressRanges
= NULL
;
1108 static void init_gIgnoredAddressRanges ( void )
1110 if (LIKELY(gIgnoredAddressRanges
!= NULL
))
1112 gIgnoredAddressRanges
= VG_(newRangeMap
)( VG_(malloc
), "mc.igIAR.1",
1113 VG_(free
), IAR_NotIgnored
);
1116 Bool
MC_(in_ignored_range
) ( Addr a
)
1118 if (LIKELY(gIgnoredAddressRanges
== NULL
))
1120 UWord how
= IAR_INVALID
;
1121 UWord key_min
= ~(UWord
)0;
1122 UWord key_max
= (UWord
)0;
1123 VG_(lookupRangeMap
)(&key_min
, &key_max
, &how
, gIgnoredAddressRanges
, a
);
1124 tl_assert(key_min
<= a
&& a
<= key_max
);
1126 case IAR_NotIgnored
: return False
;
1127 case IAR_CommandLine
: return True
;
1128 case IAR_ClientReq
: return True
;
1129 default: break; /* invalid */
1131 VG_(tool_panic
)("MC_(in_ignore_range)");
1135 Bool
MC_(in_ignored_range_below_sp
) ( Addr sp
, Addr a
, UInt szB
)
1137 if (LIKELY(!MC_(clo_ignore_range_below_sp
)))
1139 tl_assert(szB
>= 1 && szB
<= 32);
1140 tl_assert(MC_(clo_ignore_range_below_sp__first_offset
)
1141 > MC_(clo_ignore_range_below_sp__last_offset
));
1142 Addr range_lo
= sp
- MC_(clo_ignore_range_below_sp__first_offset
);
1143 Addr range_hi
= sp
- MC_(clo_ignore_range_below_sp__last_offset
);
1144 if (range_lo
>= range_hi
) {
1145 /* Bizarre. We have a wraparound situation. What should we do? */
1146 return False
; // Play safe
1148 /* This is the expected case. */
1149 if (range_lo
<= a
&& a
+ szB
- 1 <= range_hi
)
1158 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1160 static Bool
parse_Addr_pair ( const HChar
** ppc
, Addr
* result1
, Addr
* result2
)
1162 Bool ok
= VG_(parse_Addr
) (ppc
, result1
);
1168 ok
= VG_(parse_Addr
) (ppc
, result2
);
1174 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1177 static Bool
parse_UInt_pair ( const HChar
** ppc
, UInt
* result1
, UInt
* result2
)
1179 Bool ok
= VG_(parse_UInt
) (ppc
, result1
);
1185 ok
= VG_(parse_UInt
) (ppc
, result2
);
1191 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1192 fail. If they are valid, add them to the global set of ignored
1194 static Bool
parse_ignore_ranges ( const HChar
* str0
)
1196 init_gIgnoredAddressRanges();
1197 const HChar
* str
= str0
;
1198 const HChar
** ppc
= &str
;
1200 Addr start
= ~(Addr
)0;
1202 Bool ok
= parse_Addr_pair(ppc
, &start
, &end
);
1207 VG_(bindRangeMap
)( gIgnoredAddressRanges
, start
, end
, IAR_CommandLine
);
1218 /* Add or remove [start, +len) from the set of ignored ranges. */
1219 static Bool
modify_ignore_ranges ( Bool addRange
, Addr start
, Addr len
)
1221 init_gIgnoredAddressRanges();
1222 const Bool verbose
= (VG_(clo_verbosity
) > 1);
1227 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1228 start
, start
+len
-1, IAR_ClientReq
);
1230 VG_(dmsg
)("memcheck: modify_ignore_ranges: add %p %p\n",
1231 (void*)start
, (void*)(start
+len
-1));
1233 VG_(bindRangeMap
)(gIgnoredAddressRanges
,
1234 start
, start
+len
-1, IAR_NotIgnored
);
1236 VG_(dmsg
)("memcheck: modify_ignore_ranges: del %p %p\n",
1237 (void*)start
, (void*)(start
+len
-1));
1240 VG_(dmsg
)("memcheck: now have %u ranges:\n",
1241 VG_(sizeRangeMap
)(gIgnoredAddressRanges
));
1243 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
1244 UWord val
= IAR_INVALID
;
1245 UWord key_min
= ~(UWord
)0;
1246 UWord key_max
= (UWord
)0;
1247 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
1248 gIgnoredAddressRanges
, i
);
1249 VG_(dmsg
)("memcheck: [%u] %016lx-%016lx %s\n",
1250 i
, key_min
, key_max
, showIARKind(val
));
1257 /* --------------- Load/store slow cases. --------------- */
1260 __attribute__((noinline
))
1261 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong
* res
,
1262 Addr a
, SizeT nBits
, Bool bigendian
)
1264 ULong pessim
[4]; /* only used when p-l-ok=yes */
1265 SSizeT szB
= nBits
/ 8;
1266 SSizeT szL
= szB
/ 8; /* Size in Longs (64-bit units) */
1267 SSizeT i
, j
; /* Must be signed. */
1268 SizeT n_addrs_bad
= 0;
1273 /* Code below assumes load size is a power of two and at least 64
1275 tl_assert((szB
& (szB
-1)) == 0 && szL
> 0);
1277 /* If this triggers, you probably just need to increase the size of
1278 the pessim array. */
1279 tl_assert(szL
<= sizeof(pessim
) / sizeof(pessim
[0]));
1281 for (j
= 0; j
< szL
; j
++) {
1282 pessim
[j
] = V_BITS64_DEFINED
;
1283 res
[j
] = V_BITS64_UNDEFINED
;
1286 /* Make up a result V word, which contains the loaded data for
1287 valid addresses and Defined for invalid addresses. Iterate over
1288 the bytes in the word, from the most significant down to the
1289 least. The vbits to return are calculated into vbits128. Also
1290 compute the pessimising value to be used when
1291 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1292 info can be gleaned from the pessim array) but is used as a
1294 for (j
= szL
-1; j
>= 0; j
--) {
1295 ULong vbits64
= V_BITS64_UNDEFINED
;
1296 ULong pessim64
= V_BITS64_DEFINED
;
1297 UWord long_index
= byte_offset_w(szL
, bigendian
, j
);
1298 for (i
= 8-1; i
>= 0; i
--) {
1299 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP
);
1300 ai
= a
+ 8*long_index
+ byte_offset_w(8, bigendian
, i
);
1301 ok
= get_vbits8(ai
, &vbits8
);
1304 if (!ok
) n_addrs_bad
++;
1306 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1308 res
[long_index
] = vbits64
;
1309 pessim
[long_index
] = pessim64
;
1312 /* In the common case, all the addresses involved are valid, so we
1313 just return the computed V bits and have done. */
1314 if (LIKELY(n_addrs_bad
== 0))
1317 /* If there's no possibility of getting a partial-loads-ok
1318 exemption, report the error and quit. */
1319 if (!MC_(clo_partial_loads_ok
)) {
1320 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1324 /* The partial-loads-ok excemption might apply. Find out if it
1325 does. If so, don't report an addressing error, but do return
1326 Undefined for the bytes that are out of range, so as to avoid
1327 false negatives. If it doesn't apply, just report an addressing
1328 error in the usual way. */
1330 /* Some code steps along byte strings in aligned chunks
1331 even when there is only a partially defined word at the end (eg,
1332 optimised strlen). This is allowed by the memory model of
1333 modern machines, since an aligned load cannot span two pages and
1334 thus cannot "partially fault".
1336 Therefore, a load from a partially-addressible place is allowed
1337 if all of the following hold:
1338 - the command-line flag is set [by default, it isn't]
1339 - it's an aligned load
1340 - at least one of the addresses in the word *is* valid
1342 Since this suppresses the addressing error, we avoid false
1343 negatives by marking bytes undefined when they come from an
1347 /* "at least one of the addresses is invalid" */
1349 for (j
= 0; j
< szL
; j
++)
1350 ok
|= pessim
[j
] != V_BITS64_DEFINED
;
1353 # if defined(VGP_s390x_linux)
1354 tl_assert(szB
== 16); // s390 doesn't have > 128 bit SIMD
1355 /* OK if all loaded bytes are from the same page. */
1356 Bool alignedOK
= ((a
& 0xfff) <= 0x1000 - szB
);
1357 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1358 /* lxvd2x might generate an unaligned 128 bit vector load. */
1359 Bool alignedOK
= (szB
== 16);
1361 /* OK if the address is aligned by the load size. */
1362 Bool alignedOK
= (0 == (a
& (szB
- 1)));
1365 if (alignedOK
&& n_addrs_bad
< szB
) {
1366 /* Exemption applies. Use the previously computed pessimising
1367 value and return the combined result, but don't flag an
1368 addressing error. The pessimising value is Defined for valid
1369 addresses and Undefined for invalid addresses. */
1370 /* for assumption that doing bitwise or implements UifU */
1371 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1372 /* (really need "UifU" here...)
1373 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1374 for (j
= szL
-1; j
>= 0; j
--)
1375 res
[j
] |= pessim
[j
];
1379 /* Exemption doesn't apply. Flag an addressing error in the normal
1381 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1385 __attribute__((noinline
))
1386 __attribute__((used
))
1388 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
);
1391 __attribute__((noinline
))
1392 __attribute__((used
))
1393 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1394 this function may get called from hand written assembly. */
1395 ULong
mc_LOADVn_slow ( Addr a
, SizeT nBits
, Bool bigendian
)
1397 PROF_EVENT(MCPE_LOADVN_SLOW
);
1399 /* ------------ BEGIN semi-fast cases ------------ */
1400 /* These deal quickly-ish with the common auxiliary primary map
1401 cases on 64-bit platforms. Are merely a speedup hack; can be
1402 omitted without loss of correctness/functionality. Note that in
1403 both cases the "sizeof(void*) == 8" causes these cases to be
1404 folded out by compilers on 32-bit platforms. These are derived
1405 from LOADV64 and LOADV32.
1408 # if defined(VGA_mips64) && defined(VGABI_N32)
1409 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1411 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1414 SecMap
* sm
= get_secmap_for_reading(a
);
1415 UWord sm_off16
= SM_OFF_16(a
);
1416 UWord vabits16
= sm
->vabits16
[sm_off16
];
1417 if (LIKELY(vabits16
== VA_BITS16_DEFINED
))
1418 return V_BITS64_DEFINED
;
1419 if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
))
1420 return V_BITS64_UNDEFINED
;
1421 /* else fall into the slow case */
1424 # if defined(VGA_mips64) && defined(VGABI_N32)
1425 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1427 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1430 SecMap
* sm
= get_secmap_for_reading(a
);
1431 UWord sm_off
= SM_OFF(a
);
1432 UWord vabits8
= sm
->vabits8
[sm_off
];
1433 if (LIKELY(vabits8
== VA_BITS8_DEFINED
))
1434 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
1435 if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
))
1436 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
1437 /* else fall into slow case */
1440 /* ------------ END semi-fast cases ------------ */
1442 ULong vbits64
= V_BITS64_UNDEFINED
; /* result */
1443 ULong pessim64
= V_BITS64_DEFINED
; /* only used when p-l-ok=yes */
1444 SSizeT szB
= nBits
/ 8;
1445 SSizeT i
; /* Must be signed. */
1446 SizeT n_addrs_bad
= 0;
1451 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1453 /* Make up a 64-bit result V word, which contains the loaded data
1454 for valid addresses and Defined for invalid addresses. Iterate
1455 over the bytes in the word, from the most significant down to
1456 the least. The vbits to return are calculated into vbits64.
1457 Also compute the pessimising value to be used when
1458 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1459 info can be gleaned from pessim64) but is used as a
1461 for (i
= szB
-1; i
>= 0; i
--) {
1462 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP
);
1463 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1464 ok
= get_vbits8(ai
, &vbits8
);
1467 if (!ok
) n_addrs_bad
++;
1469 pessim64
|= (ok
? V_BITS8_DEFINED
: V_BITS8_UNDEFINED
);
1472 /* In the common case, all the addresses involved are valid, so we
1473 just return the computed V bits and have done. */
1474 if (LIKELY(n_addrs_bad
== 0))
1477 /* If there's no possibility of getting a partial-loads-ok
1478 exemption, report the error and quit. */
1479 if (!MC_(clo_partial_loads_ok
)) {
1480 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1484 /* The partial-loads-ok excemption might apply. Find out if it
1485 does. If so, don't report an addressing error, but do return
1486 Undefined for the bytes that are out of range, so as to avoid
1487 false negatives. If it doesn't apply, just report an addressing
1488 error in the usual way. */
1490 /* Some code steps along byte strings in aligned word-sized chunks
1491 even when there is only a partially defined word at the end (eg,
1492 optimised strlen). This is allowed by the memory model of
1493 modern machines, since an aligned load cannot span two pages and
1494 thus cannot "partially fault". Despite such behaviour being
1495 declared undefined by ANSI C/C++.
1497 Therefore, a load from a partially-addressible place is allowed
1498 if all of the following hold:
1499 - the command-line flag is set [by default, it isn't]
1500 - it's a word-sized, word-aligned load
1501 - at least one of the addresses in the word *is* valid
1503 Since this suppresses the addressing error, we avoid false
1504 negatives by marking bytes undefined when they come from an
1508 /* "at least one of the addresses is invalid" */
1509 tl_assert(pessim64
!= V_BITS64_DEFINED
);
1511 # if defined(VGA_mips64) && defined(VGABI_N32)
1512 if (szB
== VG_WORDSIZE
* 2 && VG_IS_WORD_ALIGNED(a
)
1513 && n_addrs_bad
< VG_WORDSIZE
* 2)
1514 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1515 /* On power unaligned loads of words are OK. */
1516 if (szB
== VG_WORDSIZE
&& n_addrs_bad
< VG_WORDSIZE
)
1518 if (szB
== VG_WORDSIZE
&& VG_IS_WORD_ALIGNED(a
)
1519 && n_addrs_bad
< VG_WORDSIZE
)
1522 /* Exemption applies. Use the previously computed pessimising
1523 value for vbits64 and return the combined result, but don't
1524 flag an addressing error. The pessimising value is Defined
1525 for valid addresses and Undefined for invalid addresses. */
1526 /* for assumption that doing bitwise or implements UifU */
1527 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1528 /* (really need "UifU" here...)
1529 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1530 vbits64
|= pessim64
;
1534 /* Also, in appears that gcc generates string-stepping code in
1535 32-bit chunks on 64 bit platforms. So, also grant an exception
1536 for this case. Note that the first clause of the conditional
1537 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1538 will get folded out in 32 bit builds. */
1539 # if defined(VGA_mips64) && defined(VGABI_N32)
1540 if (VG_WORDSIZE
== 4
1541 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1543 if (VG_WORDSIZE
== 8
1544 && VG_IS_4_ALIGNED(a
) && nBits
== 32 && n_addrs_bad
< 4)
1547 tl_assert(V_BIT_UNDEFINED
== 1 && V_BIT_DEFINED
== 0);
1548 /* (really need "UifU" here...)
1549 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1550 vbits64
|= pessim64
;
1551 /* Mark the upper 32 bits as undefined, just to be on the safe
1553 vbits64
|= (((ULong
)V_BITS32_UNDEFINED
) << 32);
1557 /* Exemption doesn't apply. Flag an addressing error in the normal
1559 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, False
);
1566 __attribute__((noinline
))
1567 void mc_STOREVn_slow ( Addr a
, SizeT nBits
, ULong vbytes
, Bool bigendian
)
1569 SizeT szB
= nBits
/ 8;
1570 SizeT i
, n_addrs_bad
= 0;
1575 PROF_EVENT(MCPE_STOREVN_SLOW
);
1577 /* ------------ BEGIN semi-fast cases ------------ */
1578 /* These deal quickly-ish with the common auxiliary primary map
1579 cases on 64-bit platforms. Are merely a speedup hack; can be
1580 omitted without loss of correctness/functionality. Note that in
1581 both cases the "sizeof(void*) == 8" causes these cases to be
1582 folded out by compilers on 32-bit platforms. The logic below
1583 is somewhat similar to some cases extensively commented in
1584 MC_(helperc_STOREV8).
1586 # if defined(VGA_mips64) && defined(VGABI_N32)
1587 if (LIKELY(sizeof(void*) == 4 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1589 if (LIKELY(sizeof(void*) == 8 && nBits
== 64 && VG_IS_8_ALIGNED(a
)))
1592 SecMap
* sm
= get_secmap_for_reading(a
);
1593 UWord sm_off16
= SM_OFF_16(a
);
1594 UWord vabits16
= sm
->vabits16
[sm_off16
];
1595 if (LIKELY( !is_distinguished_sm(sm
) &&
1596 (VA_BITS16_DEFINED
== vabits16
||
1597 VA_BITS16_UNDEFINED
== vabits16
) )) {
1598 /* Handle common case quickly: a is suitably aligned, */
1599 /* is mapped, and is addressible. */
1600 // Convert full V-bits in register to compact 2-bit form.
1601 if (LIKELY(V_BITS64_DEFINED
== vbytes
)) {
1602 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
1604 } else if (V_BITS64_UNDEFINED
== vbytes
) {
1605 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
1608 /* else fall into the slow case */
1610 /* else fall into the slow case */
1613 # if defined(VGA_mips64) && defined(VGABI_N32)
1614 if (LIKELY(sizeof(void*) == 4 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1616 if (LIKELY(sizeof(void*) == 8 && nBits
== 32 && VG_IS_4_ALIGNED(a
)))
1619 SecMap
* sm
= get_secmap_for_reading(a
);
1620 UWord sm_off
= SM_OFF(a
);
1621 UWord vabits8
= sm
->vabits8
[sm_off
];
1622 if (LIKELY( !is_distinguished_sm(sm
) &&
1623 (VA_BITS8_DEFINED
== vabits8
||
1624 VA_BITS8_UNDEFINED
== vabits8
) )) {
1625 /* Handle common case quickly: a is suitably aligned, */
1626 /* is mapped, and is addressible. */
1627 // Convert full V-bits in register to compact 2-bit form.
1628 if (LIKELY(V_BITS32_DEFINED
== (vbytes
& 0xFFFFFFFF))) {
1629 sm
->vabits8
[sm_off
] = VA_BITS8_DEFINED
;
1631 } else if (V_BITS32_UNDEFINED
== (vbytes
& 0xFFFFFFFF)) {
1632 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
1635 /* else fall into the slow case */
1637 /* else fall into the slow case */
1639 /* ------------ END semi-fast cases ------------ */
1641 tl_assert(nBits
== 64 || nBits
== 32 || nBits
== 16 || nBits
== 8);
1643 /* Dump vbytes in memory, iterating from least to most significant
1644 byte. At the same time establish addressibility of the location. */
1645 for (i
= 0; i
< szB
; i
++) {
1646 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP
);
1647 ai
= a
+ byte_offset_w(szB
, bigendian
, i
);
1648 vbits8
= vbytes
& 0xff;
1649 ok
= set_vbits8(ai
, vbits8
);
1650 if (!ok
) n_addrs_bad
++;
1654 /* If an address error has happened, report it. */
1655 if (n_addrs_bad
> 0)
1656 MC_(record_address_error
)( VG_(get_running_tid
)(), a
, szB
, True
);
1660 /*------------------------------------------------------------*/
1661 /*--- Setting permissions over address ranges. ---*/
1662 /*------------------------------------------------------------*/
1664 static void set_address_range_perms ( Addr a
, SizeT lenT
, UWord vabits16
,
1667 UWord sm_off
, sm_off16
;
1668 UWord vabits2
= vabits16
& 0x3;
1669 SizeT lenA
, lenB
, len_to_next_secmap
;
1673 SecMap
* example_dsm
;
1675 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS
);
1677 /* Check the V+A bits make sense. */
1678 tl_assert(VA_BITS16_NOACCESS
== vabits16
||
1679 VA_BITS16_UNDEFINED
== vabits16
||
1680 VA_BITS16_DEFINED
== vabits16
);
1682 // This code should never write PDBs; ensure this. (See comment above
1684 tl_assert(VA_BITS2_PARTDEFINED
!= vabits2
);
1689 if (lenT
> 256 * 1024 * 1024) {
1690 if (VG_(clo_verbosity
) > 0 && !VG_(clo_xml
)) {
1691 const HChar
* s
= "unknown???";
1692 if (vabits16
== VA_BITS16_NOACCESS
) s
= "noaccess";
1693 if (vabits16
== VA_BITS16_UNDEFINED
) s
= "undefined";
1694 if (vabits16
== VA_BITS16_DEFINED
) s
= "defined";
1695 VG_(message
)(Vg_UserMsg
, "Warning: set address range perms: "
1696 "large range [0x%lx, 0x%lx) (%s)\n",
1701 #ifndef PERF_FAST_SARP
1702 /*------------------ debug-only case ------------------ */
1704 // Endianness doesn't matter here because all bytes are being set to
1706 // Nb: We don't have to worry about updating the sec-V-bits table
1707 // after these set_vabits2() calls because this code never writes
1708 // VA_BITS2_PARTDEFINED values.
1710 for (i
= 0; i
< lenT
; i
++) {
1711 set_vabits2(a
+ i
, vabits2
);
1717 /*------------------ standard handling ------------------ */
1719 /* Get the distinguished secondary that we might want
1720 to use (part of the space-compression scheme). */
1721 example_dsm
= &sm_distinguished
[dsm_num
];
1723 // We have to handle ranges covering various combinations of partial and
1724 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1725 // Cases marked with a '*' are common.
1729 // * one partial sec-map (p) 1
1730 // - one whole sec-map (P) 2
1732 // * two partial sec-maps (pp) 1,3
1733 // - one partial, one whole sec-map (pP) 1,2
1734 // - one whole, one partial sec-map (Pp) 2,3
1735 // - two whole sec-maps (PP) 2,2
1737 // * one partial, one whole, one partial (pPp) 1,2,3
1738 // - one partial, two whole (pPP) 1,2,2
1739 // - two whole, one partial (PPp) 2,2,3
1740 // - three whole (PPP) 2,2,2
1742 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1743 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1744 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1745 // - N whole (PP...PP) 2,2...2,3
1747 // Break up total length (lenT) into two parts: length in the first
1748 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1749 aNext
= start_of_this_sm(a
) + SM_SIZE
;
1750 len_to_next_secmap
= aNext
- a
;
1751 if ( lenT
<= len_to_next_secmap
) {
1752 // Range entirely within one sec-map. Covers almost all cases.
1753 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
);
1756 } else if (is_start_of_sm(a
)) {
1757 // Range spans at least one whole sec-map, and starts at the beginning
1758 // of a sec-map; skip to Part 2.
1759 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
);
1764 // Range spans two or more sec-maps, first one is partial.
1765 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
);
1766 lenA
= len_to_next_secmap
;
1770 //------------------------------------------------------------------------
1771 // Part 1: Deal with the first sec_map. Most of the time the range will be
1772 // entirely within a sec_map and this part alone will suffice. Also,
1773 // doing it this way lets us avoid repeatedly testing for the crossing of
1774 // a sec-map boundary within these loops.
1775 //------------------------------------------------------------------------
1777 // If it's distinguished, make it undistinguished if necessary.
1778 sm_ptr
= get_secmap_ptr(a
);
1779 if (is_distinguished_sm(*sm_ptr
)) {
1780 if (*sm_ptr
== example_dsm
) {
1781 // Sec-map already has the V+A bits that we want, so skip.
1782 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
);
1786 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
);
1787 *sm_ptr
= copy_for_writing(*sm_ptr
);
1794 if (VG_IS_8_ALIGNED(a
)) break;
1795 if (lenA
< 1) break;
1796 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
);
1798 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1802 // 8-aligned, 8 byte steps
1804 if (lenA
< 8) break;
1805 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
);
1806 sm_off16
= SM_OFF_16(a
);
1807 sm
->vabits16
[sm_off16
] = vabits16
;
1813 if (lenA
< 1) break;
1814 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
);
1816 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1821 // We've finished the first sec-map. Is that it?
1825 //------------------------------------------------------------------------
1826 // Part 2: Fast-set entire sec-maps at a time.
1827 //------------------------------------------------------------------------
1829 // 64KB-aligned, 64KB steps.
1830 // Nb: we can reach here with lenB < SM_SIZE
1831 tl_assert(0 == lenA
);
1833 if (lenB
< SM_SIZE
) break;
1834 tl_assert(is_start_of_sm(a
));
1835 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
);
1836 sm_ptr
= get_secmap_ptr(a
);
1837 if (!is_distinguished_sm(*sm_ptr
)) {
1838 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
);
1839 // Free the non-distinguished sec-map that we're replacing. This
1840 // case happens moderately often, enough to be worthwhile.
1841 SysRes sres
= VG_(am_munmap_valgrind
)((Addr
)*sm_ptr
, sizeof(SecMap
));
1842 tl_assert2(! sr_isError(sres
), "SecMap valgrind munmap failure\n");
1844 update_SM_counts(*sm_ptr
, example_dsm
);
1845 // Make the sec-map entry point to the example DSM
1846 *sm_ptr
= example_dsm
;
1851 // We've finished the whole sec-maps. Is that it?
1855 //------------------------------------------------------------------------
1856 // Part 3: Finish off the final partial sec-map, if necessary.
1857 //------------------------------------------------------------------------
1859 tl_assert(is_start_of_sm(a
) && lenB
< SM_SIZE
);
1861 // If it's distinguished, make it undistinguished if necessary.
1862 sm_ptr
= get_secmap_ptr(a
);
1863 if (is_distinguished_sm(*sm_ptr
)) {
1864 if (*sm_ptr
== example_dsm
) {
1865 // Sec-map already has the V+A bits that we want, so stop.
1866 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
);
1869 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
);
1870 *sm_ptr
= copy_for_writing(*sm_ptr
);
1875 // 8-aligned, 8 byte steps
1877 if (lenB
< 8) break;
1878 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
);
1879 sm_off16
= SM_OFF_16(a
);
1880 sm
->vabits16
[sm_off16
] = vabits16
;
1886 if (lenB
< 1) return;
1887 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
);
1889 insert_vabits2_into_vabits8( a
, vabits2
, &(sm
->vabits8
[sm_off
]) );
1896 /* --- Set permissions for arbitrary address ranges --- */
1898 void MC_(make_mem_noaccess
) ( Addr a
, SizeT len
)
1900 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS
);
1901 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a
, len
);
1902 set_address_range_perms ( a
, len
, VA_BITS16_NOACCESS
, SM_DIST_NOACCESS
);
1903 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1904 ocache_sarp_Clear_Origins ( a
, len
);
1907 static void make_mem_undefined ( Addr a
, SizeT len
)
1909 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED
);
1910 DEBUG("make_mem_undefined(%p, %lu)\n", a
, len
);
1911 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1914 void MC_(make_mem_undefined_w_otag
) ( Addr a
, SizeT len
, UInt otag
)
1916 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG
);
1917 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a
, len
);
1918 set_address_range_perms ( a
, len
, VA_BITS16_UNDEFINED
, SM_DIST_UNDEFINED
);
1919 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1920 ocache_sarp_Set_Origins ( a
, len
, otag
);
1924 void make_mem_undefined_w_tid_and_okind ( Addr a
, SizeT len
,
1925 ThreadId tid
, UInt okind
)
1929 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1930 if it is invalid. So no need to do it here. */
1931 tl_assert(okind
<= 3);
1932 here
= VG_(record_ExeContext
)( tid
, 0/*first_ip_delta*/ );
1934 ecu
= VG_(get_ECU_from_ExeContext
)(here
);
1935 tl_assert(VG_(is_plausible_ECU
)(ecu
));
1936 MC_(make_mem_undefined_w_otag
) ( a
, len
, ecu
| okind
);
1940 void mc_new_mem_w_tid_make_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1942 make_mem_undefined_w_tid_and_okind ( a
, len
, tid
, MC_OKIND_UNKNOWN
);
1946 void mc_new_mem_w_tid_no_ECU ( Addr a
, SizeT len
, ThreadId tid
)
1948 MC_(make_mem_undefined_w_otag
) ( a
, len
, MC_OKIND_UNKNOWN
);
1951 void MC_(make_mem_defined
) ( Addr a
, SizeT len
)
1953 PROF_EVENT(MCPE_MAKE_MEM_DEFINED
);
1954 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a
, len
);
1955 set_address_range_perms ( a
, len
, VA_BITS16_DEFINED
, SM_DIST_DEFINED
);
1956 if (UNLIKELY( MC_(clo_mc_level
) == 3 ))
1957 ocache_sarp_Clear_Origins ( a
, len
);
1960 __attribute__((unused
))
1961 static void make_mem_defined_w_tid ( Addr a
, SizeT len
, ThreadId tid
)
1963 MC_(make_mem_defined
)(a
, len
);
1966 /* For each byte in [a,a+len), if the byte is addressable, make it be
1967 defined, but if it isn't addressible, leave it alone. In other
1968 words a version of MC_(make_mem_defined) that doesn't mess with
1969 addressibility. Low-performance implementation. */
1970 static void make_mem_defined_if_addressable ( Addr a
, SizeT len
)
1974 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a
, (ULong
)len
);
1975 for (i
= 0; i
< len
; i
++) {
1976 vabits2
= get_vabits2( a
+i
);
1977 if (LIKELY(VA_BITS2_NOACCESS
!= vabits2
)) {
1978 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1979 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1980 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
1986 /* Similarly (needed for mprotect handling ..) */
1987 static void make_mem_defined_if_noaccess ( Addr a
, SizeT len
)
1991 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a
, (ULong
)len
);
1992 for (i
= 0; i
< len
; i
++) {
1993 vabits2
= get_vabits2( a
+i
);
1994 if (LIKELY(VA_BITS2_NOACCESS
== vabits2
)) {
1995 set_vabits2(a
+i
, VA_BITS2_DEFINED
);
1996 if (UNLIKELY(MC_(clo_mc_level
) >= 3)) {
1997 MC_(helperc_b_store1
)( a
+i
, 0 ); /* clear the origin tag */
2003 /* --- Block-copy permissions (needed for implementing realloc() and
2006 void MC_(copy_address_range_state
) ( Addr src
, Addr dst
, SizeT len
)
2009 UChar vabits2
, vabits8
;
2010 Bool aligned
, nooverlap
;
2012 DEBUG("MC_(copy_address_range_state)\n");
2013 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE
);
2015 if (len
== 0 || src
== dst
)
2018 aligned
= VG_IS_4_ALIGNED(src
) && VG_IS_4_ALIGNED(dst
);
2019 nooverlap
= src
+len
<= dst
|| dst
+len
<= src
;
2021 if (nooverlap
&& aligned
) {
2023 /* Vectorised fast case, when no overlap and suitably aligned */
2027 vabits8
= get_vabits8_for_aligned_word32( src
+i
);
2028 set_vabits8_for_aligned_word32( dst
+i
, vabits8
);
2029 if (LIKELY(VA_BITS8_DEFINED
== vabits8
2030 || VA_BITS8_UNDEFINED
== vabits8
2031 || VA_BITS8_NOACCESS
== vabits8
)) {
2034 /* have to copy secondary map info */
2035 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+0 ))
2036 set_sec_vbits8( dst
+i
+0, get_sec_vbits8( src
+i
+0 ) );
2037 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+1 ))
2038 set_sec_vbits8( dst
+i
+1, get_sec_vbits8( src
+i
+1 ) );
2039 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+2 ))
2040 set_sec_vbits8( dst
+i
+2, get_sec_vbits8( src
+i
+2 ) );
2041 if (VA_BITS2_PARTDEFINED
== get_vabits2( src
+i
+3 ))
2042 set_sec_vbits8( dst
+i
+3, get_sec_vbits8( src
+i
+3 ) );
2049 vabits2
= get_vabits2( src
+i
);
2050 set_vabits2( dst
+i
, vabits2
);
2051 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2052 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2060 /* We have to do things the slow way */
2062 for (i
= 0, j
= len
-1; i
< len
; i
++, j
--) {
2063 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
);
2064 vabits2
= get_vabits2( src
+j
);
2065 set_vabits2( dst
+j
, vabits2
);
2066 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2067 set_sec_vbits8( dst
+j
, get_sec_vbits8( src
+j
) );
2073 for (i
= 0; i
< len
; i
++) {
2074 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
);
2075 vabits2
= get_vabits2( src
+i
);
2076 set_vabits2( dst
+i
, vabits2
);
2077 if (VA_BITS2_PARTDEFINED
== vabits2
) {
2078 set_sec_vbits8( dst
+i
, get_sec_vbits8( src
+i
) );
2087 /*------------------------------------------------------------*/
2088 /*--- Origin tracking stuff - cache basics ---*/
2089 /*------------------------------------------------------------*/
2091 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2092 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2094 Note that this implementation draws inspiration from the "origin
2095 tracking by value piggybacking" scheme described in "Tracking Bad
2096 Apples: Reporting the Origin of Null and Undefined Value Errors"
2097 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2098 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2099 implemented completely differently.
2101 Origin tags and ECUs -- about the shadow values
2102 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2104 This implementation tracks the defining point of all uninitialised
2105 values using so called "origin tags", which are 32-bit integers,
2106 rather than using the values themselves to encode the origins. The
2107 latter, so-called value piggybacking", is what the OOPSLA07 paper
2110 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2111 ints (UInts), regardless of the machine's word size. Each tag
2112 comprises an upper 30-bit ECU field and a lower 2-bit
2113 'kind' field. The ECU field is a number given out by m_execontext
2114 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2115 directly as an origin tag (otag), but in fact we want to put
2116 additional information 'kind' field to indicate roughly where the
2117 tag came from. This helps print more understandable error messages
2118 for the user -- it has no other purpose. In summary:
2120 * Both ECUs and origin tags are represented as 32-bit words
2122 * m_execontext and the core-tool interface deal purely in ECUs.
2123 They have no knowledge of origin tags - that is a purely
2124 Memcheck-internal matter.
2126 * all valid ECUs have the lowest 2 bits zero and at least
2127 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2129 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2130 constants defined in mc_include.h.
2132 * to convert an otag back to an ECU, AND it with ~3
2134 One important fact is that no valid otag is zero. A zero otag is
2135 used by the implementation to indicate "no origin", which could
2136 mean that either the value is defined, or it is undefined but the
2137 implementation somehow managed to lose the origin.
2139 The ECU used for memory created by malloc etc is derived from the
2140 stack trace at the time the malloc etc happens. This means the
2141 mechanism can show the exact allocation point for heap-created
2142 uninitialised values.
2144 In contrast, it is simply too expensive to create a complete
2145 backtrace for each stack allocation. Therefore we merely use a
2146 depth-1 backtrace for stack allocations, which can be done once at
2147 translation time, rather than N times at run time. The result of
2148 this is that, for stack created uninitialised values, Memcheck can
2149 only show the allocating function, and not what called it.
2150 Furthermore, compilers tend to move the stack pointer just once at
2151 the start of the function, to allocate all locals, and so in fact
2152 the stack origin almost always simply points to the opening brace
2153 of the function. Net result is, for stack origins, the mechanism
2154 can tell you in which function the undefined value was created, but
2155 that's all. Users will need to carefully check all locals in the
2158 Shadowing registers and memory
2159 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2161 Memory is shadowed using a two level cache structure (ocacheL1 and
2162 ocacheL2). Memory references are first directed to ocacheL1. This
2163 is a traditional 2-way set associative cache with 32-byte lines and
2164 approximate LRU replacement within each set.
2166 A naive implementation would require storing one 32 bit otag for
2167 each byte of memory covered, a 4:1 space overhead. Instead, there
2168 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2169 that shows which of the 4 bytes have that shadow value and which
2170 have a shadow value of zero (indicating no origin). Hence a lot of
2171 space is saved, but the cost is that only one different origin per
2172 4 bytes of address space can be represented. This is a source of
2173 imprecision, but how much of a problem it really is remains to be
2176 A cache line that contains all zeroes ("no origins") contains no
2177 useful information, and can be ejected from the L1 cache "for
2178 free", in the sense that a read miss on the L1 causes a line of
2179 zeroes to be installed. However, ejecting a line containing
2180 nonzeroes risks losing origin information permanently. In order to
2181 prevent such lossage, ejected nonzero lines are placed in a
2182 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2183 lines. This can grow arbitrarily large, and so should ensure that
2184 Memcheck runs out of memory in preference to losing useful origin
2185 info due to cache size limitations.
2187 Shadowing registers is a bit tricky, because the shadow values are
2188 32 bits, regardless of the size of the register. That gives a
2189 problem for registers smaller than 32 bits. The solution is to
2190 find spaces in the guest state that are unused, and use those to
2191 shadow guest state fragments smaller than 32 bits. For example, on
2192 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2193 shadow are allocated for the register's otag, then there are still
2194 12 bytes left over which could be used to shadow 3 other values.
2196 This implies there is some non-obvious mapping from guest state
2197 (start,length) pairs to the relevant shadow offset (for the origin
2198 tags). And it is unfortunately guest-architecture specific. The
2199 mapping is contained in mc_machine.c, which is quite lengthy but
2202 Instrumenting the IR
2203 ~~~~~~~~~~~~~~~~~~~~
2205 Instrumentation is largely straightforward, and done by the
2206 functions schemeE and schemeS in mc_translate.c. These generate
2207 code for handling the origin tags of expressions (E) and statements
2208 (S) respectively. The rather strange names are a reference to the
2209 "compilation schemes" shown in Simon Peyton Jones' book "The
2210 Implementation of Functional Programming Languages" (Prentice Hall,
2212 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2214 schemeS merely arranges to move shadow values around the guest
2215 state to track the incoming IR. schemeE is largely trivial too.
2216 The only significant point is how to compute the otag corresponding
2217 to binary (or ternary, quaternary, etc) operator applications. The
2218 rule is simple: just take whichever value is larger (32-bit
2219 unsigned max). Constants get the special value zero. Hence this
2220 rule always propagates a nonzero (known) otag in preference to a
2221 zero (unknown, or more likely, value-is-defined) tag, as we want.
2222 If two different undefined values are inputs to a binary operator
2223 application, then which is propagated is arbitrary, but that
2224 doesn't matter, since the program is erroneous in using either of
2225 the values, and so there's no point in attempting to propagate
2228 Since constants are abstracted to (otag) zero, much of the
2229 instrumentation code can be folded out without difficulty by the
2230 generic post-instrumentation IR cleanup pass, using these rules:
2231 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2232 constants is evaluated at JIT time. And the resulting dead code
2233 removal. In practice this causes surprisingly few Max32Us to
2234 survive through to backend code generation.
2236 Integration with the V-bits machinery
2237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2239 This is again largely straightforward. Mostly the otag and V bits
2240 stuff are independent. The only point of interaction is when the V
2241 bits instrumenter creates a call to a helper function to report an
2242 uninitialised value error -- in that case it must first use schemeE
2243 to get hold of the origin tag expression for the value, and pass
2244 that to the helper too.
2246 There is the usual stuff to do with setting address range
2247 permissions. When memory is painted undefined, we must also know
2248 the origin tag to paint with, which involves some tedious plumbing,
2249 particularly to do with the fast case stack handlers. When memory
2250 is painted defined or noaccess then the origin tags must be forced
2253 One of the goals of the implementation was to ensure that the
2254 non-origin tracking mode isn't slowed down at all. To do this,
2255 various functions to do with memory permissions setting (again,
2256 mostly pertaining to the stack) are duplicated for the with- and
2259 Dealing with stack redzones, and the NIA cache
2260 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2262 This is one of the few non-obvious parts of the implementation.
2264 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2265 reserved area below the stack pointer, that can be used as scratch
2266 space by compiler generated code for functions. In the Memcheck
2267 sources this is referred to as the "stack redzone". The important
2268 thing here is that such redzones are considered volatile across
2269 function calls and returns. So Memcheck takes care to mark them as
2270 undefined for each call and return, on the afflicted platforms.
2271 Past experience shows this is essential in order to get reliable
2272 messages about uninitialised values that come from the stack.
2274 So the question is, when we paint a redzone undefined, what origin
2275 tag should we use for it? Consider a function f() calling g(). If
2276 we paint the redzone using an otag derived from the ExeContext of
2277 the CALL/BL instruction in f, then any errors in g causing it to
2278 use uninitialised values that happen to lie in the redzone, will be
2279 reported as having their origin in f. Which is highly confusing.
2281 The same applies for returns: if, on a return, we paint the redzone
2282 using a origin tag derived from the ExeContext of the RET/BLR
2283 instruction in g, then any later errors in f causing it to use
2284 uninitialised values in the redzone, will be reported as having
2285 their origin in g. Which is just as confusing.
2287 To do it right, in both cases we need to use an origin tag which
2288 pertains to the instruction which dynamically follows the CALL/BL
2289 or RET/BLR. In short, one derived from the NIA - the "next
2290 instruction address".
2292 To make this work, Memcheck's redzone-painting helper,
2293 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2294 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2295 ExeContext's ECU as the basis for the otag used to paint the
2296 redzone. The expensive part of this is converting an NIA into an
2297 ECU, since this happens once for every call and every return. So
2298 we use a simple 511-line, 2-way set associative cache
2299 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2302 Further background comments
2303 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2305 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2306 > it really just the address of the relevant ExeContext?
2308 Well, it's not the address, but a value which has a 1-1 mapping
2309 with ExeContexts, and is guaranteed not to be zero, since zero
2310 denotes (to memcheck) "unknown origin or defined value". So these
2311 UInts are just numbers starting at 4 and incrementing by 4; each
2312 ExeContext is given a number when it is created. (*** NOTE this
2313 confuses otags and ECUs; see comments above ***).
2315 Making these otags 32-bit regardless of the machine's word size
2316 makes the 64-bit implementation easier (next para). And it doesn't
2317 really limit us in any way, since for the tags to overflow would
2318 require that the program somehow caused 2^30-1 different
2319 ExeContexts to be created, in which case it is probably in deep
2320 trouble. Not to mention V will have soaked up many tens of
2321 gigabytes of memory merely to store them all.
2323 So having 64-bit origins doesn't really buy you anything, and has
2324 the following downsides:
2326 Suppose that instead, an otag is a UWord. This would mean that, on
2329 1. It becomes hard to shadow any element of guest state which is
2330 smaller than 8 bytes. To do so means you'd need to find some
2331 8-byte-sized hole in the guest state which you don't want to
2332 shadow, and use that instead to hold the otag. On ppc64, the
2333 condition code register(s) are split into 20 UChar sized pieces,
2334 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2335 and so that would entail finding 160 bytes somewhere else in the
2338 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2339 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2340 same) and so I had to look for 4 untracked otag-sized areas in
2341 the guest state to make that possible.
2343 The same problem exists of course when origin tags are only 32
2344 bits, but it's less extreme.
2346 2. (More compelling) it doubles the size of the origin shadow
2347 memory. Given that the shadow memory is organised as a fixed
2348 size cache, and that accuracy of tracking is limited by origins
2349 falling out the cache due to space conflicts, this isn't good.
2351 > Another question: is the origin tracking perfect, or are there
2352 > cases where it fails to determine an origin?
2354 It is imperfect for at least for the following reasons, and
2357 * Insufficient capacity in the origin cache. When a line is
2358 evicted from the cache it is gone forever, and so subsequent
2359 queries for the line produce zero, indicating no origin
2360 information. Interestingly, a line containing all zeroes can be
2361 evicted "free" from the cache, since it contains no useful
2362 information, so there is scope perhaps for some cleverer cache
2363 management schemes. (*** NOTE, with the introduction of the
2364 second level origin tag cache, ocacheL2, this is no longer a
2367 * The origin cache only stores one otag per 32-bits of address
2368 space, plus 4 bits indicating which of the 4 bytes has that tag
2369 and which are considered defined. The result is that if two
2370 undefined bytes in the same word are stored in memory, the first
2371 stored byte's origin will be lost and replaced by the origin for
2374 * Nonzero origin tags for defined values. Consider a binary
2375 operator application op(x,y). Suppose y is undefined (and so has
2376 a valid nonzero origin tag), and x is defined, but erroneously
2377 has a nonzero origin tag (defined values should have tag zero).
2378 If the erroneous tag has a numeric value greater than y's tag,
2379 then the rule for propagating origin tags though binary
2380 operations, which is simply to take the unsigned max of the two
2381 tags, will erroneously propagate x's tag rather than y's.
2383 * Some obscure uses of x86/amd64 byte registers can cause lossage
2384 or confusion of origins. %AH .. %DH are treated as different
2385 from, and unrelated to, their parent registers, %EAX .. %EDX.
2386 So some weird sequences like
2388 movb undefined-value, %AH
2389 movb defined-value, %AL
2390 .. use %AX or %EAX ..
2392 will cause the origin attributed to %AH to be ignored, since %AL,
2393 %AX, %EAX are treated as the same register, and %AH as a
2394 completely separate one.
2396 But having said all that, it actually seems to work fairly well in
2400 static UWord stats_ocacheL1_find
= 0;
2401 static UWord stats_ocacheL1_found_at_1
= 0;
2402 static UWord stats_ocacheL1_found_at_N
= 0;
2403 static UWord stats_ocacheL1_misses
= 0;
2404 static UWord stats_ocacheL1_lossage
= 0;
2405 static UWord stats_ocacheL1_movefwds
= 0;
2407 static UWord stats__ocacheL2_refs
= 0;
2408 static UWord stats__ocacheL2_misses
= 0;
2409 static UWord stats__ocacheL2_n_nodes_max
= 0;
2411 /* Cache of 32-bit values, one every 32 bits of address space */
2413 #define OC_BITS_PER_LINE 5
2414 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2416 static INLINE UWord
oc_line_offset ( Addr a
) {
2417 return (a
>> 2) & (OC_W32S_PER_LINE
- 1);
2419 static INLINE Bool
is_valid_oc_tag ( Addr tag
) {
2420 return 0 == (tag
& ((1 << OC_BITS_PER_LINE
) - 1));
2423 #define OC_LINES_PER_SET 2
2425 #define OC_N_SET_BITS 20
2426 #define OC_N_SETS (1 << OC_N_SET_BITS)
2428 /* These settings give:
2429 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2430 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2433 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2439 UInt w32
[OC_W32S_PER_LINE
];
2440 UChar descr
[OC_W32S_PER_LINE
];
2444 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2445 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2446 and 'z' if all the represented tags are zero. */
2447 static UChar
classify_OCacheLine ( OCacheLine
* line
)
2450 if (line
->tag
== 1/*invalid*/)
2451 return 'e'; /* EMPTY */
2452 tl_assert(is_valid_oc_tag(line
->tag
));
2453 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2454 tl_assert(0 == ((~0xF) & line
->descr
[i
]));
2455 if (line
->w32
[i
] > 0 && line
->descr
[i
] > 0)
2456 return 'n'; /* NONZERO - contains useful info */
2458 return 'z'; /* ZERO - no useful info */
2463 OCacheLine line
[OC_LINES_PER_SET
];
2469 OCacheSet set
[OC_N_SETS
];
2473 static OCache
* ocacheL1
= NULL
;
2474 static UWord ocacheL1_event_ctr
= 0;
2476 static void init_ocacheL2 ( void ); /* fwds */
2477 static void init_OCache ( void )
2480 tl_assert(MC_(clo_mc_level
) >= 3);
2481 tl_assert(ocacheL1
== NULL
);
2482 ocacheL1
= VG_(am_shadow_alloc
)(sizeof(OCache
));
2483 if (ocacheL1
== NULL
) {
2484 VG_(out_of_memory_NORETURN
)( "memcheck:allocating ocacheL1",
2487 tl_assert(ocacheL1
!= NULL
);
2488 for (set
= 0; set
< OC_N_SETS
; set
++) {
2489 for (line
= 0; line
< OC_LINES_PER_SET
; line
++) {
2490 ocacheL1
->set
[set
].line
[line
].tag
= 1/*invalid*/;
2496 static void moveLineForwards ( OCacheSet
* set
, UWord lineno
)
2499 stats_ocacheL1_movefwds
++;
2500 tl_assert(lineno
> 0 && lineno
< OC_LINES_PER_SET
);
2501 tmp
= set
->line
[lineno
-1];
2502 set
->line
[lineno
-1] = set
->line
[lineno
];
2503 set
->line
[lineno
] = tmp
;
2506 static void zeroise_OCacheLine ( OCacheLine
* line
, Addr tag
) {
2508 for (i
= 0; i
< OC_W32S_PER_LINE
; i
++) {
2509 line
->w32
[i
] = 0; /* NO ORIGIN */
2510 line
->descr
[i
] = 0; /* REALLY REALLY NO ORIGIN! */
2515 //////////////////////////////////////////////////////////////
2516 //// OCache backing store
2518 static OSet
* ocacheL2
= NULL
;
2520 static void* ocacheL2_malloc ( const HChar
* cc
, SizeT szB
) {
2521 return VG_(malloc
)(cc
, szB
);
2523 static void ocacheL2_free ( void* v
) {
2527 /* Stats: # nodes currently in tree */
2528 static UWord stats__ocacheL2_n_nodes
= 0;
2530 static void init_ocacheL2 ( void )
2532 tl_assert(!ocacheL2
);
2533 tl_assert(sizeof(Word
) == sizeof(Addr
)); /* since OCacheLine.tag :: Addr */
2534 tl_assert(0 == offsetof(OCacheLine
,tag
));
2536 = VG_(OSetGen_Create
)( offsetof(OCacheLine
,tag
),
2537 NULL
, /* fast cmp */
2538 ocacheL2_malloc
, "mc.ioL2", ocacheL2_free
);
2539 stats__ocacheL2_n_nodes
= 0;
2542 /* Find line with the given tag in the tree, or NULL if not found. */
2543 static OCacheLine
* ocacheL2_find_tag ( Addr tag
)
2546 tl_assert(is_valid_oc_tag(tag
));
2547 stats__ocacheL2_refs
++;
2548 line
= VG_(OSetGen_Lookup
)( ocacheL2
, &tag
);
2552 /* Delete the line with the given tag from the tree, if it is present, and
2553 free up the associated memory. */
2554 static void ocacheL2_del_tag ( Addr tag
)
2557 tl_assert(is_valid_oc_tag(tag
));
2558 stats__ocacheL2_refs
++;
2559 line
= VG_(OSetGen_Remove
)( ocacheL2
, &tag
);
2561 VG_(OSetGen_FreeNode
)(ocacheL2
, line
);
2562 tl_assert(stats__ocacheL2_n_nodes
> 0);
2563 stats__ocacheL2_n_nodes
--;
2567 /* Add a copy of the given line to the tree. It must not already be
2569 static void ocacheL2_add_line ( OCacheLine
* line
)
2572 tl_assert(is_valid_oc_tag(line
->tag
));
2573 copy
= VG_(OSetGen_AllocNode
)( ocacheL2
, sizeof(OCacheLine
) );
2575 stats__ocacheL2_refs
++;
2576 VG_(OSetGen_Insert
)( ocacheL2
, copy
);
2577 stats__ocacheL2_n_nodes
++;
2578 if (stats__ocacheL2_n_nodes
> stats__ocacheL2_n_nodes_max
)
2579 stats__ocacheL2_n_nodes_max
= stats__ocacheL2_n_nodes
;
2583 //////////////////////////////////////////////////////////////
2585 __attribute__((noinline
))
2586 static OCacheLine
* find_OCacheLine_SLOW ( Addr a
)
2588 OCacheLine
*victim
, *inL2
;
2591 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2592 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2593 UWord tag
= a
& tagmask
;
2594 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2596 /* we already tried line == 0; skip therefore. */
2597 for (line
= 1; line
< OC_LINES_PER_SET
; line
++) {
2598 if (ocacheL1
->set
[setno
].line
[line
].tag
== tag
) {
2600 stats_ocacheL1_found_at_1
++;
2602 stats_ocacheL1_found_at_N
++;
2604 if (UNLIKELY(0 == (ocacheL1_event_ctr
++
2605 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS
)-1)))) {
2606 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2609 return &ocacheL1
->set
[setno
].line
[line
];
2613 /* A miss. Use the last slot. Implicitly this means we're
2614 ejecting the line in the last slot. */
2615 stats_ocacheL1_misses
++;
2616 tl_assert(line
== OC_LINES_PER_SET
);
2618 tl_assert(line
> 0);
2620 /* First, move the to-be-ejected line to the L2 cache. */
2621 victim
= &ocacheL1
->set
[setno
].line
[line
];
2622 c
= classify_OCacheLine(victim
);
2625 /* the line is empty (has invalid tag); ignore it. */
2628 /* line contains zeroes. We must ensure the backing store is
2629 updated accordingly, either by copying the line there
2630 verbatim, or by ensuring it isn't present there. We
2631 chosse the latter on the basis that it reduces the size of
2632 the backing store. */
2633 ocacheL2_del_tag( victim
->tag
);
2636 /* line contains at least one real, useful origin. Copy it
2637 to the backing store. */
2638 stats_ocacheL1_lossage
++;
2639 inL2
= ocacheL2_find_tag( victim
->tag
);
2643 ocacheL2_add_line( victim
);
2650 /* Now we must reload the L1 cache from the backing tree, if
2652 tl_assert(tag
!= victim
->tag
); /* stay sane */
2653 inL2
= ocacheL2_find_tag( tag
);
2655 /* We're in luck. It's in the L2. */
2656 ocacheL1
->set
[setno
].line
[line
] = *inL2
;
2658 /* Missed at both levels of the cache hierarchy. We have to
2659 declare it as full of zeroes (unknown origins). */
2660 stats__ocacheL2_misses
++;
2661 zeroise_OCacheLine( &ocacheL1
->set
[setno
].line
[line
], tag
);
2664 /* Move it one forwards */
2665 moveLineForwards( &ocacheL1
->set
[setno
], line
);
2668 return &ocacheL1
->set
[setno
].line
[line
];
2671 static INLINE OCacheLine
* find_OCacheLine ( Addr a
)
2673 UWord setno
= (a
>> OC_BITS_PER_LINE
) & (OC_N_SETS
- 1);
2674 UWord tagmask
= ~((1 << OC_BITS_PER_LINE
) - 1);
2675 UWord tag
= a
& tagmask
;
2677 stats_ocacheL1_find
++;
2679 if (OC_ENABLE_ASSERTIONS
) {
2680 tl_assert(setno
>= 0 && setno
< OC_N_SETS
);
2681 tl_assert(0 == (tag
& (4 * OC_W32S_PER_LINE
- 1)));
2684 if (LIKELY(ocacheL1
->set
[setno
].line
[0].tag
== tag
)) {
2685 return &ocacheL1
->set
[setno
].line
[0];
2688 return find_OCacheLine_SLOW( a
);
2691 static INLINE
void set_aligned_word64_Origin_to_undef ( Addr a
, UInt otag
)
2693 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2694 //// Set the origins for a+0 .. a+7
2696 UWord lineoff
= oc_line_offset(a
);
2697 if (OC_ENABLE_ASSERTIONS
) {
2698 tl_assert(lineoff
>= 0
2699 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2701 line
= find_OCacheLine( a
);
2702 line
->descr
[lineoff
+0] = 0xF;
2703 line
->descr
[lineoff
+1] = 0xF;
2704 line
->w32
[lineoff
+0] = otag
;
2705 line
->w32
[lineoff
+1] = otag
;
2707 //// END inlined, specialised version of MC_(helperc_b_store8)
2711 /*------------------------------------------------------------*/
2712 /*--- Aligned fast case permission setters, ---*/
2713 /*--- for dealing with stacks ---*/
2714 /*------------------------------------------------------------*/
2716 /*--------------------- 32-bit ---------------------*/
2718 /* Nb: by "aligned" here we mean 4-byte aligned */
2720 static INLINE
void make_aligned_word32_undefined ( Addr a
)
2722 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
);
2724 #ifndef PERF_FAST_STACK2
2725 make_mem_undefined(a
, 4);
2731 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2732 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
);
2733 make_mem_undefined(a
, 4);
2737 sm
= get_secmap_for_writing_low(a
);
2739 sm
->vabits8
[sm_off
] = VA_BITS8_UNDEFINED
;
2745 void make_aligned_word32_undefined_w_otag ( Addr a
, UInt otag
)
2747 make_aligned_word32_undefined(a
);
2748 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2749 //// Set the origins for a+0 .. a+3
2751 UWord lineoff
= oc_line_offset(a
);
2752 if (OC_ENABLE_ASSERTIONS
) {
2753 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2755 line
= find_OCacheLine( a
);
2756 line
->descr
[lineoff
] = 0xF;
2757 line
->w32
[lineoff
] = otag
;
2759 //// END inlined, specialised version of MC_(helperc_b_store4)
2763 void make_aligned_word32_noaccess ( Addr a
)
2765 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS
);
2767 #ifndef PERF_FAST_STACK2
2768 MC_(make_mem_noaccess
)(a
, 4);
2774 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2775 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
);
2776 MC_(make_mem_noaccess
)(a
, 4);
2780 sm
= get_secmap_for_writing_low(a
);
2782 sm
->vabits8
[sm_off
] = VA_BITS8_NOACCESS
;
2784 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2785 //// Set the origins for a+0 .. a+3.
2786 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2788 UWord lineoff
= oc_line_offset(a
);
2789 if (OC_ENABLE_ASSERTIONS
) {
2790 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
2792 line
= find_OCacheLine( a
);
2793 line
->descr
[lineoff
] = 0;
2795 //// END inlined, specialised version of MC_(helperc_b_store4)
2800 /*--------------------- 64-bit ---------------------*/
2802 /* Nb: by "aligned" here we mean 8-byte aligned */
2804 static INLINE
void make_aligned_word64_undefined ( Addr a
)
2806 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
);
2808 #ifndef PERF_FAST_STACK2
2809 make_mem_undefined(a
, 8);
2815 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2816 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
);
2817 make_mem_undefined(a
, 8);
2821 sm
= get_secmap_for_writing_low(a
);
2822 sm_off16
= SM_OFF_16(a
);
2823 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
2829 void make_aligned_word64_undefined_w_otag ( Addr a
, UInt otag
)
2831 make_aligned_word64_undefined(a
);
2832 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2833 //// Set the origins for a+0 .. a+7
2835 UWord lineoff
= oc_line_offset(a
);
2836 tl_assert(lineoff
>= 0
2837 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2838 line
= find_OCacheLine( a
);
2839 line
->descr
[lineoff
+0] = 0xF;
2840 line
->descr
[lineoff
+1] = 0xF;
2841 line
->w32
[lineoff
+0] = otag
;
2842 line
->w32
[lineoff
+1] = otag
;
2844 //// END inlined, specialised version of MC_(helperc_b_store8)
2848 void make_aligned_word64_noaccess ( Addr a
)
2850 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS
);
2852 #ifndef PERF_FAST_STACK2
2853 MC_(make_mem_noaccess
)(a
, 8);
2859 if (UNLIKELY(a
> MAX_PRIMARY_ADDRESS
)) {
2860 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
);
2861 MC_(make_mem_noaccess
)(a
, 8);
2865 sm
= get_secmap_for_writing_low(a
);
2866 sm_off16
= SM_OFF_16(a
);
2867 sm
->vabits16
[sm_off16
] = VA_BITS16_NOACCESS
;
2869 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2870 //// Clear the origins for a+0 .. a+7.
2871 if (UNLIKELY( MC_(clo_mc_level
) == 3 )) {
2873 UWord lineoff
= oc_line_offset(a
);
2874 tl_assert(lineoff
>= 0
2875 && lineoff
< OC_W32S_PER_LINE
-1/*'cos 8-aligned*/);
2876 line
= find_OCacheLine( a
);
2877 line
->descr
[lineoff
+0] = 0;
2878 line
->descr
[lineoff
+1] = 0;
2880 //// END inlined, specialised version of MC_(helperc_b_store8)
2886 /*------------------------------------------------------------*/
2887 /*--- Stack pointer adjustment ---*/
2888 /*------------------------------------------------------------*/
2890 #ifdef PERF_FAST_STACK
2893 # define MAYBE_USED __attribute__((unused))
2896 /*--------------- adjustment by 4 bytes ---------------*/
2899 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP
, UInt ecu
)
2901 UInt otag
= ecu
| MC_OKIND_STACK
;
2902 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2903 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2904 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2906 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4, otag
);
2911 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP
)
2913 PROF_EVENT(MCPE_NEW_MEM_STACK_4
);
2914 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2915 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2917 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 4 );
2922 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP
)
2924 PROF_EVENT(MCPE_DIE_MEM_STACK_4
);
2925 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2926 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2928 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-4, 4 );
2932 /*--------------- adjustment by 8 bytes ---------------*/
2935 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP
, UInt ecu
)
2937 UInt otag
= ecu
| MC_OKIND_STACK
;
2938 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2939 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2940 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2941 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2942 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2943 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2945 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8, otag
);
2950 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP
)
2952 PROF_EVENT(MCPE_NEW_MEM_STACK_8
);
2953 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2954 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2955 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2956 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
2957 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
2959 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 8 );
2964 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP
)
2966 PROF_EVENT(MCPE_DIE_MEM_STACK_8
);
2967 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2968 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2969 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2970 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
2971 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
2973 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-8, 8 );
2977 /*--------------- adjustment by 12 bytes ---------------*/
2980 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP
, UInt ecu
)
2982 UInt otag
= ecu
| MC_OKIND_STACK
;
2983 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
2984 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2985 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2986 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
2987 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
2988 /* from previous test we don't have 8-alignment at offset +0,
2989 hence must have 8 alignment at offsets +4/-4. Hence safe to
2990 do 4 at +0 and then 8 at +4/. */
2991 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
2992 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4, otag
);
2994 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12, otag
);
2999 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP
)
3001 PROF_EVENT(MCPE_NEW_MEM_STACK_12
);
3002 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3003 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3004 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3005 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3006 /* from previous test we don't have 8-alignment at offset +0,
3007 hence must have 8 alignment at offsets +4/-4. Hence safe to
3008 do 4 at +0 and then 8 at +4/. */
3009 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3010 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3012 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 12 );
3017 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP
)
3019 PROF_EVENT(MCPE_DIE_MEM_STACK_12
);
3020 /* Note the -12 in the test */
3021 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
-12 )) {
3022 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3024 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3025 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3026 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3027 /* We have 4-alignment at +0, but we don't have 8-alignment at
3028 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3029 and then 8 at -8. */
3030 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3031 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3033 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-12, 12 );
3037 /*--------------- adjustment by 16 bytes ---------------*/
3040 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP
, UInt ecu
)
3042 UInt otag
= ecu
| MC_OKIND_STACK
;
3043 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3044 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3045 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3046 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3047 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3048 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3049 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3050 Hence do 4 at +0, 8 at +4, 4 at +12. */
3051 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3052 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3053 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3055 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16, otag
);
3060 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP
)
3062 PROF_EVENT(MCPE_NEW_MEM_STACK_16
);
3063 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3064 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3065 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3066 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3067 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3068 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3069 Hence do 4 at +0, 8 at +4, 4 at +12. */
3070 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3072 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3074 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 16 );
3079 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP
)
3081 PROF_EVENT(MCPE_DIE_MEM_STACK_16
);
3082 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3083 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3084 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3085 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-8 );
3086 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3087 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3088 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3089 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3090 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3092 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-16, 16 );
3096 /*--------------- adjustment by 32 bytes ---------------*/
3099 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP
, UInt ecu
)
3101 UInt otag
= ecu
| MC_OKIND_STACK
;
3102 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3103 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3104 /* Straightforward */
3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3106 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3107 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3108 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3109 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3110 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3112 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3113 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 , otag
);
3114 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+12, otag
);
3115 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+20, otag
);
3116 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+28, otag
);
3118 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32, otag
);
3123 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP
)
3125 PROF_EVENT(MCPE_NEW_MEM_STACK_32
);
3126 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3127 /* Straightforward */
3128 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3129 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3130 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3131 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3132 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3133 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3135 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3136 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+4 );
3137 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+12 );
3138 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+20 );
3139 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+28 );
3141 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 32 );
3146 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP
)
3148 PROF_EVENT(MCPE_DIE_MEM_STACK_32
);
3149 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3150 /* Straightforward */
3151 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3152 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3153 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3154 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3155 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3156 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3158 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3159 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-28 );
3160 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-20 );
3161 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-12 );
3162 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-4 );
3164 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-32, 32 );
3168 /*--------------- adjustment by 112 bytes ---------------*/
3171 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP
, UInt ecu
)
3173 UInt otag
= ecu
| MC_OKIND_STACK
;
3174 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3175 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3176 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3177 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3178 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3179 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3180 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3181 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3182 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3183 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3184 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3185 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3186 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3187 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3188 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3189 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3191 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112, otag
);
3196 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP
)
3198 PROF_EVENT(MCPE_NEW_MEM_STACK_112
);
3199 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3200 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3201 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3202 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3203 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3204 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3205 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3206 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3207 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3208 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3209 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3210 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3211 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3212 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3213 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3215 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 112 );
3220 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP
)
3222 PROF_EVENT(MCPE_DIE_MEM_STACK_112
);
3223 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3224 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3225 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3226 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3227 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3228 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3229 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3230 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3231 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3232 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3233 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3234 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3235 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3236 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3237 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3239 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-112, 112 );
3243 /*--------------- adjustment by 128 bytes ---------------*/
3246 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP
, UInt ecu
)
3248 UInt otag
= ecu
| MC_OKIND_STACK
;
3249 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3250 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3251 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3252 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 , otag
);
3253 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3254 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3255 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3256 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3257 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3258 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3259 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3260 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3261 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3262 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3263 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3264 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3265 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3266 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3268 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128, otag
);
3273 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP
)
3275 PROF_EVENT(MCPE_NEW_MEM_STACK_128
);
3276 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3277 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3278 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3279 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3280 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3281 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3282 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3283 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3284 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3285 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3286 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3287 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3288 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3289 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3290 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3291 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3292 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3294 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 128 );
3299 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP
)
3301 PROF_EVENT(MCPE_DIE_MEM_STACK_128
);
3302 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3303 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3304 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3305 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3306 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3307 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3308 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3309 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3310 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3311 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3312 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3313 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3314 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3315 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3316 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3317 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3318 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3320 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-128, 128 );
3324 /*--------------- adjustment by 144 bytes ---------------*/
3327 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP
, UInt ecu
)
3329 UInt otag
= ecu
| MC_OKIND_STACK
;
3330 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3331 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3332 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3333 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3334 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3335 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3336 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3337 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3338 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3339 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3340 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3341 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3342 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3343 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3344 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3345 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3346 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3347 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3348 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3349 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3351 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144, otag
);
3356 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP
)
3358 PROF_EVENT(MCPE_NEW_MEM_STACK_144
);
3359 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3360 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3361 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3362 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3363 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3364 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3365 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3366 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3367 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3368 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3369 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3370 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3371 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3372 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3373 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3374 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3375 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3376 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3377 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3379 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 144 );
3384 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP
)
3386 PROF_EVENT(MCPE_DIE_MEM_STACK_144
);
3387 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3388 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3389 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3390 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3391 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3392 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3393 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3394 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3395 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3396 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3397 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3398 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3399 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3400 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3401 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3402 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3403 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3404 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3405 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3407 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-144, 144 );
3411 /*--------------- adjustment by 160 bytes ---------------*/
3414 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP
, UInt ecu
)
3416 UInt otag
= ecu
| MC_OKIND_STACK
;
3417 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3418 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3419 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
, otag
);
3420 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+8, otag
);
3421 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+16, otag
);
3422 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+24, otag
);
3423 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+32, otag
);
3424 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+40, otag
);
3425 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+48, otag
);
3426 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+56, otag
);
3427 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+64, otag
);
3428 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+72, otag
);
3429 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+80, otag
);
3430 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+88, otag
);
3431 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+96, otag
);
3432 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+104, otag
);
3433 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+112, otag
);
3434 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+120, otag
);
3435 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+128, otag
);
3436 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+136, otag
);
3437 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+144, otag
);
3438 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB
+ new_SP
+152, otag
);
3440 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160, otag
);
3445 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP
)
3447 PROF_EVENT(MCPE_NEW_MEM_STACK_160
);
3448 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3449 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
);
3450 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+8 );
3451 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+16 );
3452 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+24 );
3453 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+32 );
3454 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+40 );
3455 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+48 );
3456 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+56 );
3457 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+64 );
3458 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+72 );
3459 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+80 );
3460 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+88 );
3461 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+96 );
3462 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+104 );
3463 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+112 );
3464 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+120 );
3465 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+128 );
3466 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+136 );
3467 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+144 );
3468 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
+152 );
3470 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ new_SP
, 160 );
3475 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP
)
3477 PROF_EVENT(MCPE_DIE_MEM_STACK_160
);
3478 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB
+ new_SP
)) {
3479 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-160);
3480 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-152);
3481 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-144);
3482 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-136);
3483 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-128);
3484 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-120);
3485 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-112);
3486 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-104);
3487 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-96 );
3488 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-88 );
3489 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-80 );
3490 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-72 );
3491 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-64 );
3492 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-56 );
3493 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-48 );
3494 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-40 );
3495 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-32 );
3496 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-24 );
3497 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
-16 );
3498 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB
+ new_SP
- 8 );
3500 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ new_SP
-160, 160 );
3504 /*--------------- adjustment by N bytes ---------------*/
3506 static void mc_new_mem_stack_w_ECU ( Addr a
, SizeT len
, UInt ecu
)
3508 UInt otag
= ecu
| MC_OKIND_STACK
;
3509 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3510 MC_(make_mem_undefined_w_otag
) ( -VG_STACK_REDZONE_SZB
+ a
, len
, otag
);
3513 static void mc_new_mem_stack ( Addr a
, SizeT len
)
3515 PROF_EVENT(MCPE_NEW_MEM_STACK
);
3516 make_mem_undefined ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3519 static void mc_die_mem_stack ( Addr a
, SizeT len
)
3521 PROF_EVENT(MCPE_DIE_MEM_STACK
);
3522 MC_(make_mem_noaccess
) ( -VG_STACK_REDZONE_SZB
+ a
, len
);
3526 /* The AMD64 ABI says:
3528 "The 128-byte area beyond the location pointed to by %rsp is considered
3529 to be reserved and shall not be modified by signal or interrupt
3530 handlers. Therefore, functions may use this area for temporary data
3531 that is not needed across function calls. In particular, leaf functions
3532 may use this area for their entire stack frame, rather than adjusting
3533 the stack pointer in the prologue and epilogue. This area is known as
3536 So after any call or return we need to mark this redzone as containing
3539 Consider this: we're in function f. f calls g. g moves rsp down
3540 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3541 defined. g returns. f is buggy and reads from parts of the red zone
3542 that it didn't write on. But because g filled that area in, f is going
3543 to be picking up defined V bits and so any errors from reading bits of
3544 the red zone it didn't write, will be missed. The only solution I could
3545 think of was to make the red zone undefined when g returns to f.
3547 This is in accordance with the ABI, which makes it clear the redzone
3548 is volatile across function calls.
3550 The problem occurs the other way round too: f could fill the RZ up
3551 with defined values and g could mistakenly read them. So the RZ
3552 also needs to be nuked on function calls.
3556 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3557 improved so as to have a lower miss rate. */
3559 static UWord stats__nia_cache_queries
= 0;
3560 static UWord stats__nia_cache_misses
= 0;
3563 struct { UWord nia0
; UWord ecu0
; /* nia0 maps to ecu0 */
3564 UWord nia1
; UWord ecu1
; } /* nia1 maps to ecu1 */
3567 #define N_NIA_TO_ECU_CACHE 511
3569 static WCacheEnt nia_to_ecu_cache
[N_NIA_TO_ECU_CACHE
];
3571 static void init_nia_to_ecu_cache ( void )
3575 ExeContext
* zero_ec
;
3577 /* Fill all the slots with an entry for address zero, and the
3578 relevant otags accordingly. Hence the cache is initially filled
3580 zero_ec
= VG_(make_depth_1_ExeContext_from_Addr
)(zero_addr
);
3582 zero_ecu
= VG_(get_ECU_from_ExeContext
)(zero_ec
);
3583 tl_assert(VG_(is_plausible_ECU
)(zero_ecu
));
3584 for (i
= 0; i
< N_NIA_TO_ECU_CACHE
; i
++) {
3585 nia_to_ecu_cache
[i
].nia0
= zero_addr
;
3586 nia_to_ecu_cache
[i
].ecu0
= zero_ecu
;
3587 nia_to_ecu_cache
[i
].nia1
= zero_addr
;
3588 nia_to_ecu_cache
[i
].ecu1
= zero_ecu
;
3592 static inline UInt
convert_nia_to_ecu ( Addr nia
)
3598 tl_assert( sizeof(nia_to_ecu_cache
[0].nia1
) == sizeof(nia
) );
3600 stats__nia_cache_queries
++;
3601 i
= nia
% N_NIA_TO_ECU_CACHE
;
3602 tl_assert(i
>= 0 && i
< N_NIA_TO_ECU_CACHE
);
3604 if (LIKELY( nia_to_ecu_cache
[i
].nia0
== nia
))
3605 return nia_to_ecu_cache
[i
].ecu0
;
3607 if (LIKELY( nia_to_ecu_cache
[i
].nia1
== nia
)) {
3608 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3609 SWAP( nia_to_ecu_cache
[i
].nia0
, nia_to_ecu_cache
[i
].nia1
);
3610 SWAP( nia_to_ecu_cache
[i
].ecu0
, nia_to_ecu_cache
[i
].ecu1
);
3612 return nia_to_ecu_cache
[i
].ecu0
;
3615 stats__nia_cache_misses
++;
3616 ec
= VG_(make_depth_1_ExeContext_from_Addr
)(nia
);
3618 ecu
= VG_(get_ECU_from_ExeContext
)(ec
);
3619 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3621 nia_to_ecu_cache
[i
].nia1
= nia_to_ecu_cache
[i
].nia0
;
3622 nia_to_ecu_cache
[i
].ecu1
= nia_to_ecu_cache
[i
].ecu0
;
3624 nia_to_ecu_cache
[i
].nia0
= nia
;
3625 nia_to_ecu_cache
[i
].ecu0
= (UWord
)ecu
;
3630 /* This marks the stack as addressible but undefined, after a call or
3631 return for a target that has an ABI defined stack redzone. It
3632 happens quite a lot and needs to be fast. This is the version for
3633 origin tracking. The non-origin-tracking version is below. */
3635 void MC_(helperc_MAKE_STACK_UNINIT_w_o
) ( Addr base
, UWord len
, Addr nia
)
3637 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O
);
3639 VG_(printf
)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3642 UInt ecu
= convert_nia_to_ecu ( nia
);
3643 tl_assert(VG_(is_plausible_ECU
)(ecu
));
3645 UInt otag
= ecu
| MC_OKIND_STACK
;
3648 /* Slow(ish) version, which is fairly easily seen to be correct.
3650 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3651 make_aligned_word64_undefined_w_otag(base
+ 0, otag
);
3652 make_aligned_word64_undefined_w_otag(base
+ 8, otag
);
3653 make_aligned_word64_undefined_w_otag(base
+ 16, otag
);
3654 make_aligned_word64_undefined_w_otag(base
+ 24, otag
);
3656 make_aligned_word64_undefined_w_otag(base
+ 32, otag
);
3657 make_aligned_word64_undefined_w_otag(base
+ 40, otag
);
3658 make_aligned_word64_undefined_w_otag(base
+ 48, otag
);
3659 make_aligned_word64_undefined_w_otag(base
+ 56, otag
);
3661 make_aligned_word64_undefined_w_otag(base
+ 64, otag
);
3662 make_aligned_word64_undefined_w_otag(base
+ 72, otag
);
3663 make_aligned_word64_undefined_w_otag(base
+ 80, otag
);
3664 make_aligned_word64_undefined_w_otag(base
+ 88, otag
);
3666 make_aligned_word64_undefined_w_otag(base
+ 96, otag
);
3667 make_aligned_word64_undefined_w_otag(base
+ 104, otag
);
3668 make_aligned_word64_undefined_w_otag(base
+ 112, otag
);
3669 make_aligned_word64_undefined_w_otag(base
+ 120, otag
);
3671 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3675 /* Idea is: go fast when
3676 * 8-aligned and length is 128
3677 * the sm is available in the main primary map
3678 * the address range falls entirely with a single secondary map
3679 If all those conditions hold, just update the V+A bits by writing
3680 directly into the vabits array. (If the sm was distinguished, this
3681 will make a copy and then write to it.)
3683 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3684 /* Now we know the address range is suitably sized and aligned. */
3685 UWord a_lo
= (UWord
)(base
);
3686 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3687 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3688 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3689 /* Now we know the entire range is within the main primary map. */
3690 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3691 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3692 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3693 /* Now we know that the entire address range falls within a
3694 single secondary map, and that that secondary 'lives' in
3695 the main primary map. */
3696 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3697 UWord v_off16
= SM_OFF_16(a_lo
);
3698 UShort
* p
= &sm
->vabits16
[v_off16
];
3699 p
[ 0] = VA_BITS16_UNDEFINED
;
3700 p
[ 1] = VA_BITS16_UNDEFINED
;
3701 p
[ 2] = VA_BITS16_UNDEFINED
;
3702 p
[ 3] = VA_BITS16_UNDEFINED
;
3703 p
[ 4] = VA_BITS16_UNDEFINED
;
3704 p
[ 5] = VA_BITS16_UNDEFINED
;
3705 p
[ 6] = VA_BITS16_UNDEFINED
;
3706 p
[ 7] = VA_BITS16_UNDEFINED
;
3707 p
[ 8] = VA_BITS16_UNDEFINED
;
3708 p
[ 9] = VA_BITS16_UNDEFINED
;
3709 p
[10] = VA_BITS16_UNDEFINED
;
3710 p
[11] = VA_BITS16_UNDEFINED
;
3711 p
[12] = VA_BITS16_UNDEFINED
;
3712 p
[13] = VA_BITS16_UNDEFINED
;
3713 p
[14] = VA_BITS16_UNDEFINED
;
3714 p
[15] = VA_BITS16_UNDEFINED
;
3715 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3716 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3717 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3718 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3719 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3720 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3721 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3722 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3723 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3724 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3725 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3726 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3727 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3728 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3729 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3730 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3736 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3737 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3738 /* Now we know the address range is suitably sized and aligned. */
3739 UWord a_lo
= (UWord
)(base
);
3740 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3741 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3742 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3743 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3744 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3745 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3746 /* Now we know that the entire address range falls within a
3747 single secondary map, and that that secondary 'lives' in
3748 the main primary map. */
3749 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3750 UWord v_off16
= SM_OFF_16(a_lo
);
3751 UShort
* p
= &sm
->vabits16
[v_off16
];
3752 p
[ 0] = VA_BITS16_UNDEFINED
;
3753 p
[ 1] = VA_BITS16_UNDEFINED
;
3754 p
[ 2] = VA_BITS16_UNDEFINED
;
3755 p
[ 3] = VA_BITS16_UNDEFINED
;
3756 p
[ 4] = VA_BITS16_UNDEFINED
;
3757 p
[ 5] = VA_BITS16_UNDEFINED
;
3758 p
[ 6] = VA_BITS16_UNDEFINED
;
3759 p
[ 7] = VA_BITS16_UNDEFINED
;
3760 p
[ 8] = VA_BITS16_UNDEFINED
;
3761 p
[ 9] = VA_BITS16_UNDEFINED
;
3762 p
[10] = VA_BITS16_UNDEFINED
;
3763 p
[11] = VA_BITS16_UNDEFINED
;
3764 p
[12] = VA_BITS16_UNDEFINED
;
3765 p
[13] = VA_BITS16_UNDEFINED
;
3766 p
[14] = VA_BITS16_UNDEFINED
;
3767 p
[15] = VA_BITS16_UNDEFINED
;
3768 p
[16] = VA_BITS16_UNDEFINED
;
3769 p
[17] = VA_BITS16_UNDEFINED
;
3770 p
[18] = VA_BITS16_UNDEFINED
;
3771 p
[19] = VA_BITS16_UNDEFINED
;
3772 p
[20] = VA_BITS16_UNDEFINED
;
3773 p
[21] = VA_BITS16_UNDEFINED
;
3774 p
[22] = VA_BITS16_UNDEFINED
;
3775 p
[23] = VA_BITS16_UNDEFINED
;
3776 p
[24] = VA_BITS16_UNDEFINED
;
3777 p
[25] = VA_BITS16_UNDEFINED
;
3778 p
[26] = VA_BITS16_UNDEFINED
;
3779 p
[27] = VA_BITS16_UNDEFINED
;
3780 p
[28] = VA_BITS16_UNDEFINED
;
3781 p
[29] = VA_BITS16_UNDEFINED
;
3782 p
[30] = VA_BITS16_UNDEFINED
;
3783 p
[31] = VA_BITS16_UNDEFINED
;
3784 p
[32] = VA_BITS16_UNDEFINED
;
3785 p
[33] = VA_BITS16_UNDEFINED
;
3786 p
[34] = VA_BITS16_UNDEFINED
;
3787 p
[35] = VA_BITS16_UNDEFINED
;
3788 set_aligned_word64_Origin_to_undef( base
+ 8 * 0, otag
);
3789 set_aligned_word64_Origin_to_undef( base
+ 8 * 1, otag
);
3790 set_aligned_word64_Origin_to_undef( base
+ 8 * 2, otag
);
3791 set_aligned_word64_Origin_to_undef( base
+ 8 * 3, otag
);
3792 set_aligned_word64_Origin_to_undef( base
+ 8 * 4, otag
);
3793 set_aligned_word64_Origin_to_undef( base
+ 8 * 5, otag
);
3794 set_aligned_word64_Origin_to_undef( base
+ 8 * 6, otag
);
3795 set_aligned_word64_Origin_to_undef( base
+ 8 * 7, otag
);
3796 set_aligned_word64_Origin_to_undef( base
+ 8 * 8, otag
);
3797 set_aligned_word64_Origin_to_undef( base
+ 8 * 9, otag
);
3798 set_aligned_word64_Origin_to_undef( base
+ 8 * 10, otag
);
3799 set_aligned_word64_Origin_to_undef( base
+ 8 * 11, otag
);
3800 set_aligned_word64_Origin_to_undef( base
+ 8 * 12, otag
);
3801 set_aligned_word64_Origin_to_undef( base
+ 8 * 13, otag
);
3802 set_aligned_word64_Origin_to_undef( base
+ 8 * 14, otag
);
3803 set_aligned_word64_Origin_to_undef( base
+ 8 * 15, otag
);
3804 set_aligned_word64_Origin_to_undef( base
+ 8 * 16, otag
);
3805 set_aligned_word64_Origin_to_undef( base
+ 8 * 17, otag
);
3806 set_aligned_word64_Origin_to_undef( base
+ 8 * 18, otag
);
3807 set_aligned_word64_Origin_to_undef( base
+ 8 * 19, otag
);
3808 set_aligned_word64_Origin_to_undef( base
+ 8 * 20, otag
);
3809 set_aligned_word64_Origin_to_undef( base
+ 8 * 21, otag
);
3810 set_aligned_word64_Origin_to_undef( base
+ 8 * 22, otag
);
3811 set_aligned_word64_Origin_to_undef( base
+ 8 * 23, otag
);
3812 set_aligned_word64_Origin_to_undef( base
+ 8 * 24, otag
);
3813 set_aligned_word64_Origin_to_undef( base
+ 8 * 25, otag
);
3814 set_aligned_word64_Origin_to_undef( base
+ 8 * 26, otag
);
3815 set_aligned_word64_Origin_to_undef( base
+ 8 * 27, otag
);
3816 set_aligned_word64_Origin_to_undef( base
+ 8 * 28, otag
);
3817 set_aligned_word64_Origin_to_undef( base
+ 8 * 29, otag
);
3818 set_aligned_word64_Origin_to_undef( base
+ 8 * 30, otag
);
3819 set_aligned_word64_Origin_to_undef( base
+ 8 * 31, otag
);
3820 set_aligned_word64_Origin_to_undef( base
+ 8 * 32, otag
);
3821 set_aligned_word64_Origin_to_undef( base
+ 8 * 33, otag
);
3822 set_aligned_word64_Origin_to_undef( base
+ 8 * 34, otag
);
3823 set_aligned_word64_Origin_to_undef( base
+ 8 * 35, otag
);
3829 /* else fall into slow case */
3830 MC_(make_mem_undefined_w_otag
)(base
, len
, otag
);
3834 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3835 specialised for the non-origin-tracking case. */
3837 void MC_(helperc_MAKE_STACK_UNINIT_no_o
) ( Addr base
, UWord len
)
3839 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O
);
3841 VG_(printf
)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3845 /* Slow(ish) version, which is fairly easily seen to be correct.
3847 if (LIKELY( VG_IS_8_ALIGNED(base
) && len
==128 )) {
3848 make_aligned_word64_undefined(base
+ 0);
3849 make_aligned_word64_undefined(base
+ 8);
3850 make_aligned_word64_undefined(base
+ 16);
3851 make_aligned_word64_undefined(base
+ 24);
3853 make_aligned_word64_undefined(base
+ 32);
3854 make_aligned_word64_undefined(base
+ 40);
3855 make_aligned_word64_undefined(base
+ 48);
3856 make_aligned_word64_undefined(base
+ 56);
3858 make_aligned_word64_undefined(base
+ 64);
3859 make_aligned_word64_undefined(base
+ 72);
3860 make_aligned_word64_undefined(base
+ 80);
3861 make_aligned_word64_undefined(base
+ 88);
3863 make_aligned_word64_undefined(base
+ 96);
3864 make_aligned_word64_undefined(base
+ 104);
3865 make_aligned_word64_undefined(base
+ 112);
3866 make_aligned_word64_undefined(base
+ 120);
3868 make_mem_undefined(base
, len
);
3872 /* Idea is: go fast when
3873 * 8-aligned and length is 128
3874 * the sm is available in the main primary map
3875 * the address range falls entirely with a single secondary map
3876 If all those conditions hold, just update the V+A bits by writing
3877 directly into the vabits array. (If the sm was distinguished, this
3878 will make a copy and then write to it.)
3880 if (LIKELY( len
== 128 && VG_IS_8_ALIGNED(base
) )) {
3881 /* Now we know the address range is suitably sized and aligned. */
3882 UWord a_lo
= (UWord
)(base
);
3883 UWord a_hi
= (UWord
)(base
+ 128 - 1);
3884 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3885 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
3886 /* Now we know the entire range is within the main primary map. */
3887 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3888 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3889 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3890 /* Now we know that the entire address range falls within a
3891 single secondary map, and that that secondary 'lives' in
3892 the main primary map. */
3893 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3894 UWord v_off16
= SM_OFF_16(a_lo
);
3895 UShort
* p
= &sm
->vabits16
[v_off16
];
3896 p
[ 0] = VA_BITS16_UNDEFINED
;
3897 p
[ 1] = VA_BITS16_UNDEFINED
;
3898 p
[ 2] = VA_BITS16_UNDEFINED
;
3899 p
[ 3] = VA_BITS16_UNDEFINED
;
3900 p
[ 4] = VA_BITS16_UNDEFINED
;
3901 p
[ 5] = VA_BITS16_UNDEFINED
;
3902 p
[ 6] = VA_BITS16_UNDEFINED
;
3903 p
[ 7] = VA_BITS16_UNDEFINED
;
3904 p
[ 8] = VA_BITS16_UNDEFINED
;
3905 p
[ 9] = VA_BITS16_UNDEFINED
;
3906 p
[10] = VA_BITS16_UNDEFINED
;
3907 p
[11] = VA_BITS16_UNDEFINED
;
3908 p
[12] = VA_BITS16_UNDEFINED
;
3909 p
[13] = VA_BITS16_UNDEFINED
;
3910 p
[14] = VA_BITS16_UNDEFINED
;
3911 p
[15] = VA_BITS16_UNDEFINED
;
3917 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3918 if (LIKELY( len
== 288 && VG_IS_8_ALIGNED(base
) )) {
3919 /* Now we know the address range is suitably sized and aligned. */
3920 UWord a_lo
= (UWord
)(base
);
3921 UWord a_hi
= (UWord
)(base
+ 288 - 1);
3922 tl_assert(a_lo
< a_hi
); // paranoia: detect overflow
3923 if (a_hi
<= MAX_PRIMARY_ADDRESS
) {
3924 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
3925 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
3926 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
3927 /* Now we know that the entire address range falls within a
3928 single secondary map, and that that secondary 'lives' in
3929 the main primary map. */
3930 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
3931 UWord v_off16
= SM_OFF_16(a_lo
);
3932 UShort
* p
= &sm
->vabits16
[v_off16
];
3933 p
[ 0] = VA_BITS16_UNDEFINED
;
3934 p
[ 1] = VA_BITS16_UNDEFINED
;
3935 p
[ 2] = VA_BITS16_UNDEFINED
;
3936 p
[ 3] = VA_BITS16_UNDEFINED
;
3937 p
[ 4] = VA_BITS16_UNDEFINED
;
3938 p
[ 5] = VA_BITS16_UNDEFINED
;
3939 p
[ 6] = VA_BITS16_UNDEFINED
;
3940 p
[ 7] = VA_BITS16_UNDEFINED
;
3941 p
[ 8] = VA_BITS16_UNDEFINED
;
3942 p
[ 9] = VA_BITS16_UNDEFINED
;
3943 p
[10] = VA_BITS16_UNDEFINED
;
3944 p
[11] = VA_BITS16_UNDEFINED
;
3945 p
[12] = VA_BITS16_UNDEFINED
;
3946 p
[13] = VA_BITS16_UNDEFINED
;
3947 p
[14] = VA_BITS16_UNDEFINED
;
3948 p
[15] = VA_BITS16_UNDEFINED
;
3949 p
[16] = VA_BITS16_UNDEFINED
;
3950 p
[17] = VA_BITS16_UNDEFINED
;
3951 p
[18] = VA_BITS16_UNDEFINED
;
3952 p
[19] = VA_BITS16_UNDEFINED
;
3953 p
[20] = VA_BITS16_UNDEFINED
;
3954 p
[21] = VA_BITS16_UNDEFINED
;
3955 p
[22] = VA_BITS16_UNDEFINED
;
3956 p
[23] = VA_BITS16_UNDEFINED
;
3957 p
[24] = VA_BITS16_UNDEFINED
;
3958 p
[25] = VA_BITS16_UNDEFINED
;
3959 p
[26] = VA_BITS16_UNDEFINED
;
3960 p
[27] = VA_BITS16_UNDEFINED
;
3961 p
[28] = VA_BITS16_UNDEFINED
;
3962 p
[29] = VA_BITS16_UNDEFINED
;
3963 p
[30] = VA_BITS16_UNDEFINED
;
3964 p
[31] = VA_BITS16_UNDEFINED
;
3965 p
[32] = VA_BITS16_UNDEFINED
;
3966 p
[33] = VA_BITS16_UNDEFINED
;
3967 p
[34] = VA_BITS16_UNDEFINED
;
3968 p
[35] = VA_BITS16_UNDEFINED
;
3974 /* else fall into slow case */
3975 make_mem_undefined(base
, len
);
3979 /* And this is an even more specialised case, for the case where there
3980 is no origin tracking, and the length is 128. */
3982 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o
) ( Addr base
)
3984 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O
);
3986 VG_(printf
)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base
);
3989 /* Slow(ish) version, which is fairly easily seen to be correct.
3991 if (LIKELY( VG_IS_8_ALIGNED(base
) )) {
3992 make_aligned_word64_undefined(base
+ 0);
3993 make_aligned_word64_undefined(base
+ 8);
3994 make_aligned_word64_undefined(base
+ 16);
3995 make_aligned_word64_undefined(base
+ 24);
3997 make_aligned_word64_undefined(base
+ 32);
3998 make_aligned_word64_undefined(base
+ 40);
3999 make_aligned_word64_undefined(base
+ 48);
4000 make_aligned_word64_undefined(base
+ 56);
4002 make_aligned_word64_undefined(base
+ 64);
4003 make_aligned_word64_undefined(base
+ 72);
4004 make_aligned_word64_undefined(base
+ 80);
4005 make_aligned_word64_undefined(base
+ 88);
4007 make_aligned_word64_undefined(base
+ 96);
4008 make_aligned_word64_undefined(base
+ 104);
4009 make_aligned_word64_undefined(base
+ 112);
4010 make_aligned_word64_undefined(base
+ 120);
4012 make_mem_undefined(base
, 128);
4016 /* Idea is: go fast when
4017 * 16-aligned and length is 128
4018 * the sm is available in the main primary map
4019 * the address range falls entirely with a single secondary map
4020 If all those conditions hold, just update the V+A bits by writing
4021 directly into the vabits array. (If the sm was distinguished, this
4022 will make a copy and then write to it.)
4024 Typically this applies to amd64 'ret' instructions, since RSP is
4025 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4027 if (LIKELY( VG_IS_16_ALIGNED(base
) )) {
4028 /* Now we know the address range is suitably sized and aligned. */
4029 UWord a_lo
= (UWord
)(base
);
4030 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4031 /* FIXME: come up with a sane story on the wraparound case
4032 (which of course cnanot happen, but still..) */
4033 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4034 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4035 /* Now we know the entire range is within the main primary map. */
4036 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4037 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4038 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4039 /* Now we know that the entire address range falls within a
4040 single secondary map, and that that secondary 'lives' in
4041 the main primary map. */
4042 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
);
4043 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4044 UWord v_off
= SM_OFF(a_lo
);
4045 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &sm
->vabits8
[v_off
]);
4046 w32
[ 0] = VA_BITS32_UNDEFINED
;
4047 w32
[ 1] = VA_BITS32_UNDEFINED
;
4048 w32
[ 2] = VA_BITS32_UNDEFINED
;
4049 w32
[ 3] = VA_BITS32_UNDEFINED
;
4050 w32
[ 4] = VA_BITS32_UNDEFINED
;
4051 w32
[ 5] = VA_BITS32_UNDEFINED
;
4052 w32
[ 6] = VA_BITS32_UNDEFINED
;
4053 w32
[ 7] = VA_BITS32_UNDEFINED
;
4059 /* The same, but for when base is 8 % 16, which is the situation
4060 with RSP for amd64-ELF immediately after call instructions.
4062 if (LIKELY( VG_IS_16_ALIGNED(base
+8) )) { // restricts to 8 aligned
4063 /* Now we know the address range is suitably sized and aligned. */
4064 UWord a_lo
= (UWord
)(base
);
4065 UWord a_hi
= (UWord
)(base
+ 128 - 1);
4066 /* FIXME: come up with a sane story on the wraparound case
4067 (which of course cnanot happen, but still..) */
4068 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4069 if (LIKELY(a_hi
<= MAX_PRIMARY_ADDRESS
)) {
4070 /* Now we know the entire range is within the main primary map. */
4071 UWord pm_off_lo
= get_primary_map_low_offset(a_lo
);
4072 UWord pm_off_hi
= get_primary_map_low_offset(a_hi
);
4073 if (LIKELY(pm_off_lo
== pm_off_hi
)) {
4074 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
);
4075 /* Now we know that the entire address range falls within a
4076 single secondary map, and that that secondary 'lives' in
4077 the main primary map. */
4078 SecMap
* sm
= get_secmap_for_writing_low(a_lo
);
4079 UWord v_off16
= SM_OFF_16(a_lo
);
4080 UShort
* w16
= &sm
->vabits16
[v_off16
];
4081 UInt
* w32
= ASSUME_ALIGNED(UInt
*, &w16
[1]);
4082 /* The following assertion is commented out for obvious
4083 performance reasons, but was verified as valid when
4084 running the entire testsuite and also Firefox. */
4085 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4086 w16
[ 0] = VA_BITS16_UNDEFINED
; // w16[0]
4087 w32
[ 0] = VA_BITS32_UNDEFINED
; // w16[1,2]
4088 w32
[ 1] = VA_BITS32_UNDEFINED
; // w16[3,4]
4089 w32
[ 2] = VA_BITS32_UNDEFINED
; // w16[5,6]
4090 w32
[ 3] = VA_BITS32_UNDEFINED
; // w16[7,8]
4091 w32
[ 4] = VA_BITS32_UNDEFINED
; // w16[9,10]
4092 w32
[ 5] = VA_BITS32_UNDEFINED
; // w16[11,12]
4093 w32
[ 6] = VA_BITS32_UNDEFINED
; // w16[13,14]
4094 w16
[15] = VA_BITS16_UNDEFINED
; // w16[15]
4100 /* else fall into slow case */
4101 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
);
4102 make_mem_undefined(base
, 128);
4106 /*------------------------------------------------------------*/
4107 /*--- Checking memory ---*/
4108 /*------------------------------------------------------------*/
4119 /* Check permissions for address range. If inadequate permissions
4120 exist, *bad_addr is set to the offending address, so the caller can
4123 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4124 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4125 indicate the lowest failing address. Functions below are
4127 Bool
MC_(check_mem_is_noaccess
) ( Addr a
, SizeT len
, Addr
* bad_addr
)
4132 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS
);
4133 for (i
= 0; i
< len
; i
++) {
4134 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP
);
4135 vabits2
= get_vabits2(a
);
4136 if (VA_BITS2_NOACCESS
!= vabits2
) {
4137 if (bad_addr
!= NULL
) *bad_addr
= a
;
4145 static Bool
is_mem_addressable ( Addr a
, SizeT len
,
4146 /*OUT*/Addr
* bad_addr
)
4151 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE
);
4152 for (i
= 0; i
< len
; i
++) {
4153 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP
);
4154 vabits2
= get_vabits2(a
);
4155 if (VA_BITS2_NOACCESS
== vabits2
) {
4156 if (bad_addr
!= NULL
) *bad_addr
= a
;
4164 static MC_ReadResult
is_mem_defined ( Addr a
, SizeT len
,
4165 /*OUT*/Addr
* bad_addr
,
4171 PROF_EVENT(MCPE_IS_MEM_DEFINED
);
4172 DEBUG("is_mem_defined\n");
4174 if (otag
) *otag
= 0;
4175 if (bad_addr
) *bad_addr
= 0;
4176 for (i
= 0; i
< len
; i
++) {
4177 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP
);
4178 vabits2
= get_vabits2(a
);
4179 if (VA_BITS2_DEFINED
!= vabits2
) {
4180 // Error! Nb: Report addressability errors in preference to
4181 // definedness errors. And don't report definedeness errors unless
4182 // --undef-value-errors=yes.
4186 if (VA_BITS2_NOACCESS
== vabits2
) {
4189 if (MC_(clo_mc_level
) >= 2) {
4190 if (otag
&& MC_(clo_mc_level
) == 3) {
4191 *otag
= MC_(helperc_b_load1
)( a
);
4202 /* Like is_mem_defined but doesn't give up at the first uninitialised
4203 byte -- the entire range is always checked. This is important for
4204 detecting errors in the case where a checked range strays into
4205 invalid memory, but that fact is not detected by the ordinary
4206 is_mem_defined(), because of an undefined section that precedes the
4207 out of range section, possibly as a result of an alignment hole in
4208 the checked data. This version always checks the entire range and
4209 can report both a definedness and an accessbility error, if
4211 static void is_mem_defined_comprehensive (
4213 /*OUT*/Bool
* errorV
, /* is there a definedness err? */
4214 /*OUT*/Addr
* bad_addrV
, /* if so where? */
4215 /*OUT*/UInt
* otagV
, /* and what's its otag? */
4216 /*OUT*/Bool
* errorA
, /* is there an addressability err? */
4217 /*OUT*/Addr
* bad_addrA
/* if so where? */
4222 Bool already_saw_errV
= False
;
4224 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE
);
4225 DEBUG("is_mem_defined_comprehensive\n");
4227 tl_assert(!(*errorV
|| *errorA
));
4229 for (i
= 0; i
< len
; i
++) {
4230 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
);
4231 vabits2
= get_vabits2(a
);
4233 case VA_BITS2_DEFINED
:
4236 case VA_BITS2_UNDEFINED
:
4237 case VA_BITS2_PARTDEFINED
:
4238 if (!already_saw_errV
) {
4241 if (MC_(clo_mc_level
) == 3) {
4242 *otagV
= MC_(helperc_b_load1
)( a
);
4246 already_saw_errV
= True
;
4248 a
++; /* keep going */
4250 case VA_BITS2_NOACCESS
:
4253 return; /* give up now. */
4261 /* Check a zero-terminated ascii string. Tricky -- don't want to
4262 examine the actual bytes, to find the end, until we're sure it is
4265 static Bool
mc_is_defined_asciiz ( Addr a
, Addr
* bad_addr
, UInt
* otag
)
4269 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ
);
4270 DEBUG("mc_is_defined_asciiz\n");
4272 if (otag
) *otag
= 0;
4273 if (bad_addr
) *bad_addr
= 0;
4275 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP
);
4276 vabits2
= get_vabits2(a
);
4277 if (VA_BITS2_DEFINED
!= vabits2
) {
4278 // Error! Nb: Report addressability errors in preference to
4279 // definedness errors. And don't report definedeness errors unless
4280 // --undef-value-errors=yes.
4284 if (VA_BITS2_NOACCESS
== vabits2
) {
4287 if (MC_(clo_mc_level
) >= 2) {
4288 if (otag
&& MC_(clo_mc_level
) == 3) {
4289 *otag
= MC_(helperc_b_load1
)( a
);
4294 /* Ok, a is safe to read. */
4295 if (* ((UChar
*)a
) == 0) {
4303 /*------------------------------------------------------------*/
4304 /*--- Memory event handlers ---*/
4305 /*------------------------------------------------------------*/
4308 void check_mem_is_addressable ( CorePart part
, ThreadId tid
, const HChar
* s
,
4309 Addr base
, SizeT size
)
4312 Bool ok
= is_mem_addressable ( base
, size
, &bad_addr
);
4316 case Vg_CoreSysCall
:
4317 MC_(record_memparam_error
) ( tid
, bad_addr
,
4318 /*isAddrErr*/True
, s
, 0/*otag*/ );
4322 MC_(record_core_mem_error
)( tid
, s
);
4326 VG_(tool_panic
)("check_mem_is_addressable: unexpected CorePart");
4332 void check_mem_is_defined ( CorePart part
, ThreadId tid
, const HChar
* s
,
4333 Addr base
, SizeT size
)
4337 MC_ReadResult res
= is_mem_defined ( base
, size
, &bad_addr
, &otag
);
4340 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4343 case Vg_CoreSysCall
:
4344 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4345 isAddrErr
? 0 : otag
);
4348 case Vg_CoreSysCallArgInMem
:
4349 MC_(record_regparam_error
) ( tid
, s
, otag
);
4352 /* If we're being asked to jump to a silly address, record an error
4353 message before potentially crashing the entire system. */
4354 case Vg_CoreTranslate
:
4355 MC_(record_jump_error
)( tid
, bad_addr
);
4359 VG_(tool_panic
)("check_mem_is_defined: unexpected CorePart");
4365 void check_mem_is_defined_asciiz ( CorePart part
, ThreadId tid
,
4366 const HChar
* s
, Addr str
)
4369 Addr bad_addr
= 0; // shut GCC up
4372 tl_assert(part
== Vg_CoreSysCall
);
4373 res
= mc_is_defined_asciiz ( (Addr
)str
, &bad_addr
, &otag
);
4375 Bool isAddrErr
= ( MC_AddrErr
== res
? True
: False
);
4376 MC_(record_memparam_error
) ( tid
, bad_addr
, isAddrErr
, s
,
4377 isAddrErr
? 0 : otag
);
4381 /* Handling of mmap and mprotect is not as simple as it seems.
4383 The underlying semantics are that memory obtained from mmap is
4384 always initialised, but may be inaccessible. And changes to the
4385 protection of memory do not change its contents and hence not its
4386 definedness state. Problem is we can't model
4387 inaccessible-but-with-some-definedness state; once we mark memory
4388 as inaccessible we lose all info about definedness, and so can't
4389 restore that if it is later made accessible again.
4391 One obvious thing to do is this:
4393 mmap/mprotect NONE -> noaccess
4394 mmap/mprotect other -> defined
4396 The problem case here is: taking accessible memory, writing
4397 uninitialised data to it, mprotecting it NONE and later mprotecting
4398 it back to some accessible state causes the undefinedness to be
4401 A better proposal is:
4403 (1) mmap NONE -> make noaccess
4404 (2) mmap other -> make defined
4406 (3) mprotect NONE -> # no change
4407 (4) mprotect other -> change any "noaccess" to "defined"
4409 (2) is OK because memory newly obtained from mmap really is defined
4410 (zeroed out by the kernel -- doing anything else would
4411 constitute a massive security hole.)
4413 (1) is OK because the only way to make the memory usable is via
4414 (4), in which case we also wind up correctly marking it all as
4417 (3) is the weak case. We choose not to change memory state.
4418 (presumably the range is in some mixture of "defined" and
4419 "undefined", viz, accessible but with arbitrary V bits). Doing
4420 nothing means we retain the V bits, so that if the memory is
4421 later mprotected "other", the V bits remain unchanged, so there
4422 can be no false negatives. The bad effect is that if there's
4423 an access in the area, then MC cannot warn; but at least we'll
4424 get a SEGV to show, so it's better than nothing.
4426 Consider the sequence (3) followed by (4). Any memory that was
4427 "defined" or "undefined" previously retains its state (as
4428 required). Any memory that was "noaccess" before can only have
4429 been made that way by (1), and so it's OK to change it to
4432 See https://bugs.kde.org/show_bug.cgi?id=205541
4433 and https://bugs.kde.org/show_bug.cgi?id=210268
4436 void mc_new_mem_mmap ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
,
4439 if (rr
|| ww
|| xx
) {
4440 /* (2) mmap/mprotect other -> defined */
4441 MC_(make_mem_defined
)(a
, len
);
4443 /* (1) mmap/mprotect NONE -> noaccess */
4444 MC_(make_mem_noaccess
)(a
, len
);
4449 void mc_new_mem_mprotect ( Addr a
, SizeT len
, Bool rr
, Bool ww
, Bool xx
)
4451 if (rr
|| ww
|| xx
) {
4452 /* (4) mprotect other -> change any "noaccess" to "defined" */
4453 make_mem_defined_if_noaccess(a
, len
);
4455 /* (3) mprotect NONE -> # no change */
4462 void mc_new_mem_startup( Addr a
, SizeT len
,
4463 Bool rr
, Bool ww
, Bool xx
, ULong di_handle
)
4465 // Because code is defined, initialised variables get put in the data
4466 // segment and are defined, and uninitialised variables get put in the
4467 // bss segment and are auto-zeroed (and so defined).
4469 // It's possible that there will be padding between global variables.
4470 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4471 // a program uses it, Memcheck will not complain. This is arguably a
4472 // false negative, but it's a grey area -- the behaviour is defined (the
4473 // padding is zeroed) but it's probably not what the user intended. And
4474 // we can't avoid it.
4476 // Note: we generally ignore RWX permissions, because we can't track them
4477 // without requiring more than one A bit which would slow things down a
4478 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4479 // So we mark any such pages as "unaddressable".
4480 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4481 a
, (ULong
)len
, rr
, ww
, xx
);
4482 mc_new_mem_mmap(a
, len
, rr
, ww
, xx
, di_handle
);
4486 void mc_post_mem_write(CorePart part
, ThreadId tid
, Addr a
, SizeT len
)
4488 MC_(make_mem_defined
)(a
, len
);
4492 /*------------------------------------------------------------*/
4493 /*--- Register event handlers ---*/
4494 /*------------------------------------------------------------*/
4496 /* Try and get a nonzero origin for the guest state section of thread
4497 tid characterised by (offset,size). Return 0 if nothing to show
4499 static UInt
mb_get_origin_for_guest_offset ( ThreadId tid
,
4500 Int offset
, SizeT size
)
4505 sh2off
= MC_(get_otrack_shadow_offset
)( offset
, size
);
4507 return 0; /* This piece of guest state is not tracked */
4508 tl_assert(sh2off
>= 0);
4509 tl_assert(0 == (sh2off
% 4));
4510 area
[0] = 0x31313131;
4511 area
[2] = 0x27272727;
4512 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&area
[1], 2/*shadowno*/,sh2off
,4 );
4513 tl_assert(area
[0] == 0x31313131);
4514 tl_assert(area
[2] == 0x27272727);
4520 /* When some chunk of guest state is written, mark the corresponding
4521 shadow area as valid. This is used to initialise arbitrarily large
4522 chunks of guest state, hence the _SIZE value, which has to be as
4523 big as the biggest guest state.
4525 static void mc_post_reg_write ( CorePart part
, ThreadId tid
,
4526 PtrdiffT offset
, SizeT size
)
4528 # define MAX_REG_WRITE_SIZE 1744
4529 UChar area
[MAX_REG_WRITE_SIZE
];
4530 tl_assert(size
<= MAX_REG_WRITE_SIZE
);
4531 VG_(memset
)(area
, V_BITS8_DEFINED
, size
);
4532 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/,offset
,size
, area
);
4533 # undef MAX_REG_WRITE_SIZE
4537 void mc_post_reg_write_clientcall ( ThreadId tid
,
4538 PtrdiffT offset
, SizeT size
, Addr f
)
4540 mc_post_reg_write(/*dummy*/0, tid
, offset
, size
);
4543 /* Look at the definedness of the guest's shadow state for
4544 [offset, offset+len). If any part of that is undefined, record
4547 static void mc_pre_reg_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
4548 PtrdiffT offset
, SizeT size
)
4555 tl_assert(size
<= 16);
4557 VG_(get_shadow_regs_area
)( tid
, area
, 1/*shadowNo*/,offset
,size
);
4560 for (i
= 0; i
< size
; i
++) {
4561 if (area
[i
] != V_BITS8_DEFINED
) {
4570 /* We've found some undefinedness. See if we can also find an
4572 otag
= mb_get_origin_for_guest_offset( tid
, offset
, size
);
4573 MC_(record_regparam_error
) ( tid
, s
, otag
);
4577 /*------------------------------------------------------------*/
4578 /*--- Register-memory event handlers ---*/
4579 /*------------------------------------------------------------*/
4581 static void mc_copy_mem_to_reg ( CorePart part
, ThreadId tid
, Addr a
,
4582 PtrdiffT guest_state_offset
, SizeT size
)
4590 for (i
= 0; i
< size
; i
++) {
4591 get_vbits8( a
+i
, &vbits8
);
4592 VG_(set_shadow_regs_area
)( tid
, 1/*shadowNo*/, guest_state_offset
+i
,
4596 if (MC_(clo_mc_level
) != 3)
4599 /* Track origins. */
4600 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4606 d32
= MC_(helperc_b_load1
)( a
);
4609 d32
= MC_(helperc_b_load2
)( a
);
4612 d32
= MC_(helperc_b_load4
)( a
);
4615 d32
= MC_(helperc_b_load8
)( a
);
4618 d32
= MC_(helperc_b_load16
)( a
);
4621 d32
= MC_(helperc_b_load32
)( a
);
4627 VG_(set_shadow_regs_area
)( tid
, 2/*shadowNo*/, offset
, 4, (UChar
*)&d32
);
4630 static void mc_copy_reg_to_mem ( CorePart part
, ThreadId tid
,
4631 PtrdiffT guest_state_offset
, Addr a
,
4640 for (i
= 0; i
< size
; i
++) {
4641 VG_(get_shadow_regs_area
)( tid
, &vbits8
, 1/*shadowNo*/,
4642 guest_state_offset
+i
, 1 );
4643 set_vbits8( a
+i
, vbits8
);
4646 if (MC_(clo_mc_level
) != 3)
4649 /* Track origins. */
4650 offset
= MC_(get_otrack_shadow_offset
)( guest_state_offset
, size
);
4654 VG_(get_shadow_regs_area
)( tid
, (UChar
*)&d32
, 2/*shadowNo*/, offset
, 4 );
4657 MC_(helperc_b_store1
)( a
, d32
);
4660 MC_(helperc_b_store2
)( a
, d32
);
4663 MC_(helperc_b_store4
)( a
, d32
);
4666 MC_(helperc_b_store8
)( a
, d32
);
4669 MC_(helperc_b_store16
)( a
, d32
);
4672 MC_(helperc_b_store32
)( a
, d32
);
4680 /*------------------------------------------------------------*/
4681 /*--- Some static assertions ---*/
4682 /*------------------------------------------------------------*/
4684 /* The handwritten assembly helpers below have baked-in assumptions
4685 about various constant values. These assertions attempt to make
4686 that a bit safer by checking those values and flagging changes that
4687 would make the assembly invalid. Not perfect but it's better than
4690 STATIC_ASSERT(SM_CHUNKS
* 4 == 65536);
4692 STATIC_ASSERT(VA_BITS8_DEFINED
== 0xAA);
4693 STATIC_ASSERT(VA_BITS8_UNDEFINED
== 0x55);
4695 STATIC_ASSERT(V_BITS32_DEFINED
== 0x00000000);
4696 STATIC_ASSERT(V_BITS32_UNDEFINED
== 0xFFFFFFFF);
4698 STATIC_ASSERT(VA_BITS4_DEFINED
== 0xA);
4699 STATIC_ASSERT(VA_BITS4_UNDEFINED
== 0x5);
4701 STATIC_ASSERT(V_BITS16_DEFINED
== 0x0000);
4702 STATIC_ASSERT(V_BITS16_UNDEFINED
== 0xFFFF);
4704 STATIC_ASSERT(VA_BITS2_DEFINED
== 2);
4705 STATIC_ASSERT(VA_BITS2_UNDEFINED
== 1);
4707 STATIC_ASSERT(V_BITS8_DEFINED
== 0x00);
4708 STATIC_ASSERT(V_BITS8_UNDEFINED
== 0xFF);
4711 /*------------------------------------------------------------*/
4712 /*--- Functions called directly from generated code: ---*/
4713 /*--- Load/store handlers. ---*/
4714 /*------------------------------------------------------------*/
4716 /* Types: LOADV32, LOADV16, LOADV8 are:
4718 so they return 32-bits on 32-bit machines and 64-bits on
4719 64-bit machines. Addr has the same size as a host word.
4721 LOADV64 is always ULong fn ( Addr a )
4723 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4724 are a UWord, and for STOREV64 they are a ULong.
4727 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4728 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4729 primary map. This is all very tricky (and important!), so let's
4730 work through the maths by hand (below), *and* assert for these
4731 values at startup. */
4732 #define MASK(_szInBytes) \
4733 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4735 /* MASK only exists so as to define this macro. */
4736 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4737 ((_a) & MASK((_szInBits>>3)))
4739 /* On a 32-bit machine:
4741 N_PRIMARY_BITS == 16, so
4742 N_PRIMARY_MAP == 0x10000, so
4743 N_PRIMARY_MAP-1 == 0xFFFF, so
4744 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4746 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4747 = ~ ( 0xFFFF | 0xFFFF0000 )
4751 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4752 = ~ ( 0xFFFE | 0xFFFF0000 )
4756 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4757 = ~ ( 0xFFFC | 0xFFFF0000 )
4761 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4762 = ~ ( 0xFFF8 | 0xFFFF0000 )
4766 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4767 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4768 the 1-byte alignment case, it is always a zero value, since MASK(1)
4769 is zero. All as expected.
4771 On a 64-bit machine, it's more complex, since we're testing
4772 simultaneously for misalignment and for the address being at or
4775 N_PRIMARY_BITS == 20, so
4776 N_PRIMARY_MAP == 0x100000, so
4777 N_PRIMARY_MAP-1 == 0xFFFFF, so
4778 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4780 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4781 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4783 = 0xFFFF'FFF0'0000'0000
4785 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4786 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4788 = 0xFFFF'FFF0'0000'0001
4790 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4791 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4793 = 0xFFFF'FFF0'0000'0003
4795 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4796 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4798 = 0xFFFF'FFF0'0000'0007
4801 /*------------------------------------------------------------*/
4802 /*--- LOADV256 and LOADV128 ---*/
4803 /*------------------------------------------------------------*/
4806 void mc_LOADV_128_or_256 ( /*OUT*/ULong
* res
,
4807 Addr a
, SizeT nBits
, Bool isBigEndian
)
4809 PROF_EVENT(MCPE_LOADV_128_OR_256
);
4811 #ifndef PERF_FAST_LOADV
4812 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4816 UWord sm_off16
, vabits16
, j
;
4817 UWord nBytes
= nBits
/ 8;
4818 UWord nULongs
= nBytes
/ 8;
4821 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,nBits
) )) {
4822 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1
);
4823 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4827 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4828 suitably aligned, is mapped, and addressible. */
4829 for (j
= 0; j
< nULongs
; j
++) {
4830 sm
= get_secmap_for_reading_low(a
+ 8*j
);
4831 sm_off16
= SM_OFF_16(a
+ 8*j
);
4832 vabits16
= sm
->vabits16
[sm_off16
];
4834 // Convert V bits from compact memory form to expanded
4836 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4837 res
[j
] = V_BITS64_DEFINED
;
4838 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4839 res
[j
] = V_BITS64_UNDEFINED
;
4841 /* Slow case: some block of 8 bytes are not all-defined or
4843 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2
);
4844 mc_LOADV_128_or_256_slow( res
, a
, nBits
, isBigEndian
);
4853 VG_REGPARM(2) void MC_(helperc_LOADV256be
) ( /*OUT*/V256
* res
, Addr a
)
4855 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, True
);
4857 VG_REGPARM(2) void MC_(helperc_LOADV256le
) ( /*OUT*/V256
* res
, Addr a
)
4859 mc_LOADV_128_or_256(&res
->w64
[0], a
, 256, False
);
4862 VG_REGPARM(2) void MC_(helperc_LOADV128be
) ( /*OUT*/V128
* res
, Addr a
)
4864 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, True
);
4866 VG_REGPARM(2) void MC_(helperc_LOADV128le
) ( /*OUT*/V128
* res
, Addr a
)
4868 mc_LOADV_128_or_256(&res
->w64
[0], a
, 128, False
);
4871 /*------------------------------------------------------------*/
4873 /*------------------------------------------------------------*/
4876 ULong
mc_LOADV64 ( Addr a
, Bool isBigEndian
)
4878 PROF_EVENT(MCPE_LOADV64
);
4880 #ifndef PERF_FAST_LOADV
4881 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4884 UWord sm_off16
, vabits16
;
4887 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4888 PROF_EVENT(MCPE_LOADV64_SLOW1
);
4889 return (ULong
)mc_LOADVn_slow( a
, 64, isBigEndian
);
4892 sm
= get_secmap_for_reading_low(a
);
4893 sm_off16
= SM_OFF_16(a
);
4894 vabits16
= sm
->vabits16
[sm_off16
];
4896 // Handle common case quickly: a is suitably aligned, is mapped, and
4898 // Convert V bits from compact memory form to expanded register form.
4899 if (LIKELY(vabits16
== VA_BITS16_DEFINED
)) {
4900 return V_BITS64_DEFINED
;
4901 } else if (LIKELY(vabits16
== VA_BITS16_UNDEFINED
)) {
4902 return V_BITS64_UNDEFINED
;
4904 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4905 PROF_EVENT(MCPE_LOADV64_SLOW2
);
4906 return mc_LOADVn_slow( a
, 64, isBigEndian
);
4912 // Generic for all platforms
4913 VG_REGPARM(1) ULong
MC_(helperc_LOADV64be
) ( Addr a
)
4915 return mc_LOADV64(a
, True
);
4918 // Non-generic assembly for arm32-linux
4919 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4920 && defined(VGP_arm_linux)
4921 /* See mc_main_asm.c */
4923 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4924 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4925 /* See mc_main_asm.c */
4928 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
4929 VG_REGPARM(1) ULong
MC_(helperc_LOADV64le
) ( Addr a
)
4931 return mc_LOADV64(a
, False
);
4935 /*------------------------------------------------------------*/
4936 /*--- STOREV64 ---*/
4937 /*------------------------------------------------------------*/
4940 void mc_STOREV64 ( Addr a
, ULong vbits64
, Bool isBigEndian
)
4942 PROF_EVENT(MCPE_STOREV64
);
4944 #ifndef PERF_FAST_STOREV
4945 // XXX: this slow case seems to be marginally faster than the fast case!
4946 // Investigate further.
4947 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4950 UWord sm_off16
, vabits16
;
4953 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,64) )) {
4954 PROF_EVENT(MCPE_STOREV64_SLOW1
);
4955 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4959 sm
= get_secmap_for_reading_low(a
);
4960 sm_off16
= SM_OFF_16(a
);
4961 vabits16
= sm
->vabits16
[sm_off16
];
4963 // To understand the below cleverness, see the extensive comments
4964 // in MC_(helperc_STOREV8).
4965 if (LIKELY(V_BITS64_DEFINED
== vbits64
)) {
4966 if (LIKELY(vabits16
== (UShort
)VA_BITS16_DEFINED
)) {
4969 if (!is_distinguished_sm(sm
) && VA_BITS16_UNDEFINED
== vabits16
) {
4970 sm
->vabits16
[sm_off16
] = VA_BITS16_DEFINED
;
4973 PROF_EVENT(MCPE_STOREV64_SLOW2
);
4974 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4977 if (V_BITS64_UNDEFINED
== vbits64
) {
4978 if (vabits16
== (UShort
)VA_BITS16_UNDEFINED
) {
4981 if (!is_distinguished_sm(sm
) && VA_BITS16_DEFINED
== vabits16
) {
4982 sm
->vabits16
[sm_off16
] = VA_BITS16_UNDEFINED
;
4985 PROF_EVENT(MCPE_STOREV64_SLOW3
);
4986 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4990 PROF_EVENT(MCPE_STOREV64_SLOW4
);
4991 mc_STOREVn_slow( a
, 64, vbits64
, isBigEndian
);
4996 VG_REGPARM(1) void MC_(helperc_STOREV64be
) ( Addr a
, ULong vbits64
)
4998 mc_STOREV64(a
, vbits64
, True
);
5000 VG_REGPARM(1) void MC_(helperc_STOREV64le
) ( Addr a
, ULong vbits64
)
5002 mc_STOREV64(a
, vbits64
, False
);
5005 /*------------------------------------------------------------*/
5007 /*------------------------------------------------------------*/
5010 UWord
mc_LOADV32 ( Addr a
, Bool isBigEndian
)
5012 PROF_EVENT(MCPE_LOADV32
);
5014 #ifndef PERF_FAST_LOADV
5015 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5018 UWord sm_off
, vabits8
;
5021 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5022 PROF_EVENT(MCPE_LOADV32_SLOW1
);
5023 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5026 sm
= get_secmap_for_reading_low(a
);
5028 vabits8
= sm
->vabits8
[sm_off
];
5030 // Handle common case quickly: a is suitably aligned, is mapped, and the
5031 // entire word32 it lives in is addressible.
5032 // Convert V bits from compact memory form to expanded register form.
5033 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5034 // Almost certainly not necessary, but be paranoid.
5035 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5036 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_DEFINED
);
5037 } else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) {
5038 return ((UWord
)0xFFFFFFFF00000000ULL
| (UWord
)V_BITS32_UNDEFINED
);
5040 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5041 PROF_EVENT(MCPE_LOADV32_SLOW2
);
5042 return (UWord
)mc_LOADVn_slow( a
, 32, isBigEndian
);
5048 // Generic for all platforms
5049 VG_REGPARM(1) UWord
MC_(helperc_LOADV32be
) ( Addr a
)
5051 return mc_LOADV32(a
, True
);
5054 // Non-generic assembly for arm32-linux
5055 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5056 && defined(VGP_arm_linux)
5057 /* See mc_main_asm.c */
5059 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5060 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5061 /* See mc_main_asm.c */
5064 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5065 VG_REGPARM(1) UWord
MC_(helperc_LOADV32le
) ( Addr a
)
5067 return mc_LOADV32(a
, False
);
5071 /*------------------------------------------------------------*/
5072 /*--- STOREV32 ---*/
5073 /*------------------------------------------------------------*/
5076 void mc_STOREV32 ( Addr a
, UWord vbits32
, Bool isBigEndian
)
5078 PROF_EVENT(MCPE_STOREV32
);
5080 #ifndef PERF_FAST_STOREV
5081 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5084 UWord sm_off
, vabits8
;
5087 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,32) )) {
5088 PROF_EVENT(MCPE_STOREV32_SLOW1
);
5089 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5093 sm
= get_secmap_for_reading_low(a
);
5095 vabits8
= sm
->vabits8
[sm_off
];
5097 // To understand the below cleverness, see the extensive comments
5098 // in MC_(helperc_STOREV8).
5099 if (LIKELY(V_BITS32_DEFINED
== vbits32
)) {
5100 if (LIKELY(vabits8
== (UInt
)VA_BITS8_DEFINED
)) {
5103 if (!is_distinguished_sm(sm
) && VA_BITS8_UNDEFINED
== vabits8
) {
5104 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_DEFINED
;
5107 PROF_EVENT(MCPE_STOREV32_SLOW2
);
5108 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5111 if (V_BITS32_UNDEFINED
== vbits32
) {
5112 if (vabits8
== (UInt
)VA_BITS8_UNDEFINED
) {
5115 if (!is_distinguished_sm(sm
) && VA_BITS8_DEFINED
== vabits8
) {
5116 sm
->vabits8
[sm_off
] = (UInt
)VA_BITS8_UNDEFINED
;
5119 PROF_EVENT(MCPE_STOREV32_SLOW3
);
5120 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5124 PROF_EVENT(MCPE_STOREV32_SLOW4
);
5125 mc_STOREVn_slow( a
, 32, (ULong
)vbits32
, isBigEndian
);
5130 VG_REGPARM(2) void MC_(helperc_STOREV32be
) ( Addr a
, UWord vbits32
)
5132 mc_STOREV32(a
, vbits32
, True
);
5134 VG_REGPARM(2) void MC_(helperc_STOREV32le
) ( Addr a
, UWord vbits32
)
5136 mc_STOREV32(a
, vbits32
, False
);
5139 /*------------------------------------------------------------*/
5141 /*------------------------------------------------------------*/
5144 UWord
mc_LOADV16 ( Addr a
, Bool isBigEndian
)
5146 PROF_EVENT(MCPE_LOADV16
);
5148 #ifndef PERF_FAST_LOADV
5149 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5152 UWord sm_off
, vabits8
;
5155 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5156 PROF_EVENT(MCPE_LOADV16_SLOW1
);
5157 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5160 sm
= get_secmap_for_reading_low(a
);
5162 vabits8
= sm
->vabits8
[sm_off
];
5163 // Handle common case quickly: a is suitably aligned, is mapped, and is
5165 // Convert V bits from compact memory form to expanded register form
5166 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS16_DEFINED
; }
5167 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS16_UNDEFINED
; }
5169 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5170 // the two sub-bytes.
5171 UChar vabits4
= extract_vabits4_from_vabits8(a
, vabits8
);
5172 if (vabits4
== VA_BITS4_DEFINED
) { return V_BITS16_DEFINED
; }
5173 else if (vabits4
== VA_BITS4_UNDEFINED
) { return V_BITS16_UNDEFINED
; }
5175 /* Slow case: the two bytes are not all-defined or all-undefined. */
5176 PROF_EVENT(MCPE_LOADV16_SLOW2
);
5177 return (UWord
)mc_LOADVn_slow( a
, 16, isBigEndian
);
5184 // Generic for all platforms
5185 VG_REGPARM(1) UWord
MC_(helperc_LOADV16be
) ( Addr a
)
5187 return mc_LOADV16(a
, True
);
5190 // Non-generic assembly for arm32-linux
5191 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5192 && defined(VGP_arm_linux)
5193 __asm__( /* Derived from NCode template */
5196 ".global vgMemCheck_helperc_LOADV16le \n"
5197 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5198 "vgMemCheck_helperc_LOADV16le: \n" //
5200 " bne .LLV16LEc12 \n" // if misaligned
5201 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5202 " movw r3, #:lower16:primary_map \n" //
5203 " uxth r1, r0 \n" // r1 = sec-map-offB
5204 " movt r3, #:upper16:primary_map \n" //
5205 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5206 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5207 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5208 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5210 " mov r0, #0xFFFFFFFF \n" //
5211 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5214 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5215 " bne .LLV16LEc4 \n" //
5217 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5220 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5221 // Extract the relevant 4 bits and inspect.
5222 " and r2, r0, #2 \n" // addr & 2
5223 " add r2, r2, r2 \n" // 2 * (addr & 2)
5224 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5225 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5227 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5228 " beq .LLV16LEh9 \n" //
5230 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5231 " beq .LLV16LEc2 \n" //
5233 ".LLV16LEc12: \n" //
5234 " push {r4, lr} \n" //
5236 " mov r1, #16 \n" //
5237 " bl mc_LOADVn_slow \n" //
5238 " pop {r4, pc} \n" //
5239 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5243 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5244 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5248 ".global vgMemCheck_helperc_LOADV16le\n"
5249 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5250 "vgMemCheck_helperc_LOADV16le:\n"
5251 " test $0x1, %eax\n"
5252 " jne .LLV16LE5\n" /* jump if not aligned */
5254 " shr $0x10, %edx\n"
5255 " mov primary_map(,%edx,4), %ecx\n"
5256 " movzwl %ax, %edx\n"
5258 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5259 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5260 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5262 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5265 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5266 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5268 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5277 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5279 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5281 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5283 " jmp mc_LOADVn_slow\n"
5284 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5289 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5290 VG_REGPARM(1) UWord
MC_(helperc_LOADV16le
) ( Addr a
)
5292 return mc_LOADV16(a
, False
);
5296 /*------------------------------------------------------------*/
5297 /*--- STOREV16 ---*/
5298 /*------------------------------------------------------------*/
5300 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5302 Bool
accessible_vabits4_in_vabits8 ( Addr a
, UChar vabits8
)
5305 tl_assert(VG_IS_2_ALIGNED(a
)); // Must be 2-aligned
5306 shift
= (a
& 2) << 1; // shift by 0 or 4
5307 vabits8
>>= shift
; // shift the four bits to the bottom
5308 // check 2 x vabits2 != VA_BITS2_NOACCESS
5309 return ((0x3 & vabits8
) != VA_BITS2_NOACCESS
)
5310 && ((0xc & vabits8
) != VA_BITS2_NOACCESS
<< 2);
5314 void mc_STOREV16 ( Addr a
, UWord vbits16
, Bool isBigEndian
)
5316 PROF_EVENT(MCPE_STOREV16
);
5318 #ifndef PERF_FAST_STOREV
5319 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5322 UWord sm_off
, vabits8
;
5325 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,16) )) {
5326 PROF_EVENT(MCPE_STOREV16_SLOW1
);
5327 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5331 sm
= get_secmap_for_reading_low(a
);
5333 vabits8
= sm
->vabits8
[sm_off
];
5335 // To understand the below cleverness, see the extensive comments
5336 // in MC_(helperc_STOREV8).
5337 if (LIKELY(V_BITS16_DEFINED
== vbits16
)) {
5338 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5341 if (!is_distinguished_sm(sm
)
5342 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5343 insert_vabits4_into_vabits8( a
, VA_BITS4_DEFINED
,
5344 &(sm
->vabits8
[sm_off
]) );
5347 PROF_EVENT(MCPE_STOREV16_SLOW2
);
5348 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5350 if (V_BITS16_UNDEFINED
== vbits16
) {
5351 if (vabits8
== VA_BITS8_UNDEFINED
) {
5354 if (!is_distinguished_sm(sm
)
5355 && accessible_vabits4_in_vabits8(a
, vabits8
)) {
5356 insert_vabits4_into_vabits8( a
, VA_BITS4_UNDEFINED
,
5357 &(sm
->vabits8
[sm_off
]) );
5360 PROF_EVENT(MCPE_STOREV16_SLOW3
);
5361 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5365 PROF_EVENT(MCPE_STOREV16_SLOW4
);
5366 mc_STOREVn_slow( a
, 16, (ULong
)vbits16
, isBigEndian
);
5372 VG_REGPARM(2) void MC_(helperc_STOREV16be
) ( Addr a
, UWord vbits16
)
5374 mc_STOREV16(a
, vbits16
, True
);
5376 VG_REGPARM(2) void MC_(helperc_STOREV16le
) ( Addr a
, UWord vbits16
)
5378 mc_STOREV16(a
, vbits16
, False
);
5381 /*------------------------------------------------------------*/
5383 /*------------------------------------------------------------*/
5385 /* Note: endianness is irrelevant for size == 1 */
5387 // Non-generic assembly for arm32-linux
5388 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5389 && defined(VGP_arm_linux)
5390 __asm__( /* Derived from NCode template */
5393 ".global vgMemCheck_helperc_LOADV8 \n"
5394 ".type vgMemCheck_helperc_LOADV8, %function \n"
5395 "vgMemCheck_helperc_LOADV8: \n" //
5396 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5397 " movw r3, #:lower16:primary_map \n" //
5398 " uxth r1, r0 \n" // r1 = sec-map-offB
5399 " movt r3, #:upper16:primary_map \n" //
5400 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5401 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5402 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5403 " bne .LLV8c0 \n" // no, goto .LLV8c0
5405 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5408 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5409 " bne .LLV8c4 \n" //
5411 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5414 // r1 holds sec-map-VABITS8
5415 // r0 holds the address. Extract the relevant 2 bits and inspect.
5416 " and r2, r0, #3 \n" // addr & 3
5417 " add r2, r2, r2 \n" // 2 * (addr & 3)
5418 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5419 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5421 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5422 " beq .LLV8h9 \n" //
5424 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5425 " beq .LLV8c2 \n" //
5427 " push {r4, lr} \n" //
5430 " bl mc_LOADVn_slow \n" //
5431 " pop {r4, pc} \n" //
5432 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5436 /* Non-generic assembly for x86-linux */
5437 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5438 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5442 ".global vgMemCheck_helperc_LOADV8\n"
5443 ".type vgMemCheck_helperc_LOADV8, @function\n"
5444 "vgMemCheck_helperc_LOADV8:\n"
5446 " shr $0x10, %edx\n"
5447 " mov primary_map(,%edx,4), %ecx\n"
5448 " movzwl %ax, %edx\n"
5450 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5451 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5452 " jne .LLV8LE2\n" /* jump if not defined */
5454 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5457 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5458 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5460 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5469 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5471 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5472 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5474 " jmp mc_LOADVn_slow\n"
5475 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5480 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5482 UWord
MC_(helperc_LOADV8
) ( Addr a
)
5484 PROF_EVENT(MCPE_LOADV8
);
5486 #ifndef PERF_FAST_LOADV
5487 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5490 UWord sm_off
, vabits8
;
5493 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5494 PROF_EVENT(MCPE_LOADV8_SLOW1
);
5495 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5498 sm
= get_secmap_for_reading_low(a
);
5500 vabits8
= sm
->vabits8
[sm_off
];
5501 // Convert V bits from compact memory form to expanded register form
5502 // Handle common case quickly: a is mapped, and the entire
5503 // word32 it lives in is addressible.
5504 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) { return V_BITS8_DEFINED
; }
5505 else if (LIKELY(vabits8
== VA_BITS8_UNDEFINED
)) { return V_BITS8_UNDEFINED
; }
5507 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5509 UChar vabits2
= extract_vabits2_from_vabits8(a
, vabits8
);
5510 if (vabits2
== VA_BITS2_DEFINED
) { return V_BITS8_DEFINED
; }
5511 else if (vabits2
== VA_BITS2_UNDEFINED
) { return V_BITS8_UNDEFINED
; }
5513 /* Slow case: the byte is not all-defined or all-undefined. */
5514 PROF_EVENT(MCPE_LOADV8_SLOW2
);
5515 return (UWord
)mc_LOADVn_slow( a
, 8, False
/*irrelevant*/ );
5523 /*------------------------------------------------------------*/
5525 /*------------------------------------------------------------*/
5528 void MC_(helperc_STOREV8
) ( Addr a
, UWord vbits8
)
5530 PROF_EVENT(MCPE_STOREV8
);
5532 #ifndef PERF_FAST_STOREV
5533 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5536 UWord sm_off
, vabits8
;
5539 if (UNLIKELY( UNALIGNED_OR_HIGH(a
,8) )) {
5540 PROF_EVENT(MCPE_STOREV8_SLOW1
);
5541 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5545 sm
= get_secmap_for_reading_low(a
);
5547 vabits8
= sm
->vabits8
[sm_off
];
5549 // Clevernesses to speed up storing V bits.
5550 // The 64/32/16 bit cases also have similar clevernesses, but it
5551 // works a little differently to the code below.
5553 // Cleverness 1: sometimes we don't have to write the shadow memory at
5554 // all, if we can tell that what we want to write is the same as what is
5555 // already there. These cases are marked below as "defined on defined" and
5556 // "undefined on undefined".
5559 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5560 // be written in the secondary map. V bits can be directly written
5561 // if 4 conditions are respected:
5562 // * The address for which V bits are written is naturally aligned
5563 // on 1 byte for STOREV8 (this is always true)
5564 // on 2 bytes for STOREV16
5565 // on 4 bytes for STOREV32
5566 // on 8 bytes for STOREV64.
5567 // * V bits being written are either fully defined or fully undefined.
5568 // (for partially defined V bits, V bits cannot be directly written,
5569 // as the secondary vbits table must be maintained).
5570 // * the secmap is not distinguished (distinguished maps cannot be
5572 // * the memory corresponding to the V bits being written is
5573 // accessible (if one or more bytes are not accessible,
5574 // we must call mc_STOREVn_slow in order to report accessibility
5576 // Note that for STOREV32 and STOREV64, it is too expensive
5577 // to verify the accessibility of each byte for the benefit it
5578 // brings. Instead, a quicker check is done by comparing to
5579 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5580 // but misses some opportunity of direct modifications.
5581 // Checking each byte accessibility was measured for
5582 // STOREV32+perf tests and was slowing down all perf tests.
5583 // The cases corresponding to cleverness 2 are marked below as
5585 if (LIKELY(V_BITS8_DEFINED
== vbits8
)) {
5586 if (LIKELY(vabits8
== VA_BITS8_DEFINED
)) {
5587 return; // defined on defined
5589 if (!is_distinguished_sm(sm
)
5590 && VA_BITS2_NOACCESS
!= extract_vabits2_from_vabits8(a
, vabits8
)) {
5592 insert_vabits2_into_vabits8( a
, VA_BITS2_DEFINED
,
5593 &(sm
->vabits8
[sm_off
]) );
5596 PROF_EVENT(MCPE_STOREV8_SLOW2
);
5597 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5600 if (V_BITS8_UNDEFINED
== vbits8
) {
5601 if (vabits8
== VA_BITS8_UNDEFINED
) {
5602 return; // undefined on undefined
5604 if (!is_distinguished_sm(sm
)
5605 && (VA_BITS2_NOACCESS
5606 != extract_vabits2_from_vabits8(a
, vabits8
))) {
5608 insert_vabits2_into_vabits8( a
, VA_BITS2_UNDEFINED
,
5609 &(sm
->vabits8
[sm_off
]) );
5612 PROF_EVENT(MCPE_STOREV8_SLOW3
);
5613 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5617 // Partially defined word
5618 PROF_EVENT(MCPE_STOREV8_SLOW4
);
5619 mc_STOREVn_slow( a
, 8, (ULong
)vbits8
, False
/*irrelevant*/ );
5625 /*------------------------------------------------------------*/
5626 /*--- Functions called directly from generated code: ---*/
5627 /*--- Value-check failure handlers. ---*/
5628 /*------------------------------------------------------------*/
5630 /* Call these ones when an origin is available ... */
5632 void MC_(helperc_value_check0_fail_w_o
) ( UWord origin
) {
5633 MC_(record_cond_error
) ( VG_(get_running_tid
)(), (UInt
)origin
);
5637 void MC_(helperc_value_check1_fail_w_o
) ( UWord origin
) {
5638 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, (UInt
)origin
);
5642 void MC_(helperc_value_check4_fail_w_o
) ( UWord origin
) {
5643 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, (UInt
)origin
);
5647 void MC_(helperc_value_check8_fail_w_o
) ( UWord origin
) {
5648 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, (UInt
)origin
);
5652 void MC_(helperc_value_checkN_fail_w_o
) ( HWord sz
, UWord origin
) {
5653 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, (UInt
)origin
);
5656 /* ... and these when an origin isn't available. */
5659 void MC_(helperc_value_check0_fail_no_o
) ( void ) {
5660 MC_(record_cond_error
) ( VG_(get_running_tid
)(), 0/*origin*/ );
5664 void MC_(helperc_value_check1_fail_no_o
) ( void ) {
5665 MC_(record_value_error
) ( VG_(get_running_tid
)(), 1, 0/*origin*/ );
5669 void MC_(helperc_value_check4_fail_no_o
) ( void ) {
5670 MC_(record_value_error
) ( VG_(get_running_tid
)(), 4, 0/*origin*/ );
5674 void MC_(helperc_value_check8_fail_no_o
) ( void ) {
5675 MC_(record_value_error
) ( VG_(get_running_tid
)(), 8, 0/*origin*/ );
5679 void MC_(helperc_value_checkN_fail_no_o
) ( HWord sz
) {
5680 MC_(record_value_error
) ( VG_(get_running_tid
)(), (Int
)sz
, 0/*origin*/ );
5684 /*------------------------------------------------------------*/
5685 /*--- Metadata get/set functions, for client requests. ---*/
5686 /*------------------------------------------------------------*/
5688 // Nb: this expands the V+A bits out into register-form V bits, even though
5689 // they're in memory. This is for backward compatibility, and because it's
5690 // probably what the user wants.
5692 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5693 error [no longer used], 3 == addressing error. */
5694 /* Nb: We used to issue various definedness/addressability errors from here,
5695 but we took them out because they ranged from not-very-helpful to
5696 downright annoying, and they complicated the error data structures. */
5697 static Int
mc_get_or_set_vbits_for_client (
5701 Bool setting
, /* True <=> set vbits, False <=> get vbits */
5702 Bool is_client_request
/* True <=> real user request
5703 False <=> internal call from gdbserver */
5710 /* Check that arrays are addressible before doing any getting/setting.
5711 vbits to be checked only for real user request. */
5712 for (i
= 0; i
< szB
; i
++) {
5713 if (VA_BITS2_NOACCESS
== get_vabits2(a
+ i
) ||
5714 (is_client_request
&& VA_BITS2_NOACCESS
== get_vabits2(vbits
+ i
))) {
5722 for (i
= 0; i
< szB
; i
++) {
5723 ok
= set_vbits8(a
+ i
, ((UChar
*)vbits
)[i
]);
5728 for (i
= 0; i
< szB
; i
++) {
5729 ok
= get_vbits8(a
+ i
, &vbits8
);
5731 ((UChar
*)vbits
)[i
] = vbits8
;
5733 if (is_client_request
)
5734 // The bytes in vbits[] have now been set, so mark them as such.
5735 MC_(make_mem_defined
)(vbits
, szB
);
5742 /*------------------------------------------------------------*/
5743 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5744 /*------------------------------------------------------------*/
5746 /* For the memory leak detector, say whether an entire 64k chunk of
5747 address space is possibly in use, or not. If in doubt return
5750 Bool
MC_(is_within_valid_secondary
) ( Addr a
)
5752 SecMap
* sm
= maybe_get_secmap_for ( a
);
5753 if (sm
== NULL
|| sm
== &sm_distinguished
[SM_DIST_NOACCESS
]) {
5754 /* Definitely not in use. */
5762 /* For the memory leak detector, say whether or not a given word
5763 address is to be regarded as valid. */
5764 Bool
MC_(is_valid_aligned_word
) ( Addr a
)
5766 tl_assert(sizeof(UWord
) == 4 || sizeof(UWord
) == 8);
5767 tl_assert(VG_IS_WORD_ALIGNED(a
));
5768 if (get_vabits8_for_aligned_word32 (a
) != VA_BITS8_DEFINED
)
5770 if (sizeof(UWord
) == 8) {
5771 if (get_vabits8_for_aligned_word32 (a
+ 4) != VA_BITS8_DEFINED
)
5774 if (UNLIKELY(MC_(in_ignored_range
)(a
)))
5781 /*------------------------------------------------------------*/
5782 /*--- Initialisation ---*/
5783 /*------------------------------------------------------------*/
5785 static void init_shadow_memory ( void )
5790 tl_assert(V_BIT_UNDEFINED
== 1);
5791 tl_assert(V_BIT_DEFINED
== 0);
5792 tl_assert(V_BITS8_UNDEFINED
== 0xFF);
5793 tl_assert(V_BITS8_DEFINED
== 0);
5795 /* Build the 3 distinguished secondaries */
5796 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5797 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_NOACCESS
;
5799 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5800 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_UNDEFINED
;
5802 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5803 for (i
= 0; i
< SM_CHUNKS
; i
++) sm
->vabits8
[i
] = VA_BITS8_DEFINED
;
5805 /* Set up the primary map. */
5806 /* These entries gradually get overwritten as the used address
5808 for (i
= 0; i
< N_PRIMARY_MAP
; i
++)
5809 primary_map
[i
] = &sm_distinguished
[SM_DIST_NOACCESS
];
5811 /* Auxiliary primary maps */
5812 init_auxmap_L1_L2();
5814 /* auxmap_size = auxmap_used = 0;
5815 no ... these are statically initialised */
5817 /* Secondary V bit table */
5818 secVBitTable
= createSecVBitTable();
5822 /*------------------------------------------------------------*/
5823 /*--- Sanity check machinery (permanently engaged) ---*/
5824 /*------------------------------------------------------------*/
5826 static Bool
mc_cheap_sanity_check ( void )
5829 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK
);
5830 /* Check for sane operating level */
5831 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5833 /* nothing else useful we can rapidly check */
5837 static Bool
mc_expensive_sanity_check ( void )
5840 Word n_secmaps_found
;
5842 const HChar
* errmsg
;
5845 if (0) VG_(printf
)("expensive sanity check\n");
5848 n_sanity_expensive
++;
5849 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK
);
5851 /* Check for sane operating level */
5852 if (MC_(clo_mc_level
) < 1 || MC_(clo_mc_level
) > 3)
5855 /* Check that the 3 distinguished SMs are still as they should be. */
5857 /* Check noaccess DSM. */
5858 sm
= &sm_distinguished
[SM_DIST_NOACCESS
];
5859 for (i
= 0; i
< SM_CHUNKS
; i
++)
5860 if (sm
->vabits8
[i
] != VA_BITS8_NOACCESS
)
5863 /* Check undefined DSM. */
5864 sm
= &sm_distinguished
[SM_DIST_UNDEFINED
];
5865 for (i
= 0; i
< SM_CHUNKS
; i
++)
5866 if (sm
->vabits8
[i
] != VA_BITS8_UNDEFINED
)
5869 /* Check defined DSM. */
5870 sm
= &sm_distinguished
[SM_DIST_DEFINED
];
5871 for (i
= 0; i
< SM_CHUNKS
; i
++)
5872 if (sm
->vabits8
[i
] != VA_BITS8_DEFINED
)
5876 VG_(printf
)("memcheck expensive sanity: "
5877 "distinguished_secondaries have changed\n");
5881 /* If we're not checking for undefined value errors, the secondary V bit
5882 * table should be empty. */
5883 if (MC_(clo_mc_level
) == 1) {
5884 if (0 != VG_(OSetGen_Size
)(secVBitTable
))
5888 /* check the auxiliary maps, very thoroughly */
5889 n_secmaps_found
= 0;
5890 errmsg
= check_auxmap_L1_L2_sanity( &n_secmaps_found
);
5892 VG_(printf
)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg
);
5896 /* n_secmaps_found is now the number referred to by the auxiliary
5897 primary map. Now add on the ones referred to by the main
5899 for (i
= 0; i
< N_PRIMARY_MAP
; i
++) {
5900 if (primary_map
[i
] == NULL
) {
5903 if (!is_distinguished_sm(primary_map
[i
]))
5908 /* check that the number of secmaps issued matches the number that
5909 are reachable (iow, no secmap leaks) */
5910 if (n_secmaps_found
!= (n_issued_SMs
- n_deissued_SMs
))
5914 VG_(printf
)("memcheck expensive sanity: "
5915 "apparent secmap leakage\n");
5920 VG_(printf
)("memcheck expensive sanity: "
5921 "auxmap covers wrong address space\n");
5925 /* there is only one pointer to each secmap (expensive) */
5930 /*------------------------------------------------------------*/
5931 /*--- Command line args ---*/
5932 /*------------------------------------------------------------*/
5934 /* 31 Aug 2015: Vectorised code is now so widespread that
5935 --partial-loads-ok needs to be enabled by default on all platforms.
5936 Not doing so causes lots of false errors. */
5937 Bool
MC_(clo_partial_loads_ok
) = True
;
5938 Long
MC_(clo_freelist_vol
) = 20*1000*1000LL;
5939 Long
MC_(clo_freelist_big_blocks
) = 1*1000*1000LL;
5940 LeakCheckMode
MC_(clo_leak_check
) = LC_Summary
;
5941 VgRes
MC_(clo_leak_resolution
) = Vg_HighRes
;
5942 UInt
MC_(clo_show_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
5943 UInt
MC_(clo_error_for_leak_kinds
) = R2S(Possible
) | R2S(Unreached
);
5944 UInt
MC_(clo_leak_check_heuristics
) = H2S(LchStdString
)
5947 | H2S( LchMultipleInheritance
);
5948 Bool
MC_(clo_xtree_leak
) = False
;
5949 const HChar
* MC_(clo_xtree_leak_file
) = "xtleak.kcg.%p";
5950 Bool
MC_(clo_workaround_gcc296_bugs
) = False
;
5951 Int
MC_(clo_malloc_fill
) = -1;
5952 Int
MC_(clo_free_fill
) = -1;
5953 KeepStacktraces
MC_(clo_keep_stacktraces
) = KS_alloc_and_free
;
5954 Int
MC_(clo_mc_level
) = 2;
5955 Bool
MC_(clo_show_mismatched_frees
) = True
;
5957 ExpensiveDefinednessChecks
5958 MC_(clo_expensive_definedness_checks
) = EdcAUTO
;
5960 Bool
MC_(clo_ignore_range_below_sp
) = False
;
5961 UInt
MC_(clo_ignore_range_below_sp__first_offset
) = 0;
5962 UInt
MC_(clo_ignore_range_below_sp__last_offset
) = 0;
5964 static const HChar
* MC_(parse_leak_heuristics_tokens
) =
5965 "-,stdstring,length64,newarray,multipleinheritance";
5966 /* The first heuristic value (LchNone) has no keyword, as this is
5967 a fake heuristic used to collect the blocks found without any
5970 static Bool
mc_process_cmd_line_options(const HChar
* arg
)
5972 const HChar
* tmp_str
;
5975 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
5977 /* Set MC_(clo_mc_level):
5978 1 = A bit tracking only
5979 2 = A and V bit tracking, but no V bit origins
5980 3 = A and V bit tracking, and V bit origins
5982 Do this by inspecting --undef-value-errors= and
5983 --track-origins=. Reject the case --undef-value-errors=no
5984 --track-origins=yes as meaningless.
5986 if (0 == VG_(strcmp
)(arg
, "--undef-value-errors=no")) {
5987 if (MC_(clo_mc_level
) == 3) {
5990 MC_(clo_mc_level
) = 1;
5994 if (0 == VG_(strcmp
)(arg
, "--undef-value-errors=yes")) {
5995 if (MC_(clo_mc_level
) == 1)
5996 MC_(clo_mc_level
) = 2;
5999 if (0 == VG_(strcmp
)(arg
, "--track-origins=no")) {
6000 if (MC_(clo_mc_level
) == 3)
6001 MC_(clo_mc_level
) = 2;
6004 if (0 == VG_(strcmp
)(arg
, "--track-origins=yes")) {
6005 if (MC_(clo_mc_level
) == 1) {
6008 MC_(clo_mc_level
) = 3;
6013 if VG_BOOL_CLO(arg
, "--partial-loads-ok", MC_(clo_partial_loads_ok
)) {}
6014 else if VG_USET_CLO(arg
, "--errors-for-leak-kinds",
6015 MC_(parse_leak_kinds_tokens
),
6016 MC_(clo_error_for_leak_kinds
)) {}
6017 else if VG_USET_CLO(arg
, "--show-leak-kinds",
6018 MC_(parse_leak_kinds_tokens
),
6019 MC_(clo_show_leak_kinds
)) {}
6020 else if VG_USET_CLO(arg
, "--leak-check-heuristics",
6021 MC_(parse_leak_heuristics_tokens
),
6022 MC_(clo_leak_check_heuristics
)) {}
6023 else if (VG_BOOL_CLO(arg
, "--show-reachable", tmp_show
)) {
6025 MC_(clo_show_leak_kinds
) = MC_(all_Reachedness
)();
6027 MC_(clo_show_leak_kinds
) &= ~R2S(Reachable
);
6030 else if VG_BOOL_CLO(arg
, "--show-possibly-lost", tmp_show
) {
6032 MC_(clo_show_leak_kinds
) |= R2S(Possible
);
6034 MC_(clo_show_leak_kinds
) &= ~R2S(Possible
);
6037 else if VG_BOOL_CLO(arg
, "--workaround-gcc296-bugs",
6038 MC_(clo_workaround_gcc296_bugs
)) {}
6040 else if VG_BINT_CLO(arg
, "--freelist-vol", MC_(clo_freelist_vol
),
6041 0, 10*1000*1000*1000LL) {}
6043 else if VG_BINT_CLO(arg
, "--freelist-big-blocks",
6044 MC_(clo_freelist_big_blocks
),
6045 0, 10*1000*1000*1000LL) {}
6047 else if VG_XACT_CLO(arg
, "--leak-check=no",
6048 MC_(clo_leak_check
), LC_Off
) {}
6049 else if VG_XACT_CLO(arg
, "--leak-check=summary",
6050 MC_(clo_leak_check
), LC_Summary
) {}
6051 else if VG_XACT_CLO(arg
, "--leak-check=yes",
6052 MC_(clo_leak_check
), LC_Full
) {}
6053 else if VG_XACT_CLO(arg
, "--leak-check=full",
6054 MC_(clo_leak_check
), LC_Full
) {}
6056 else if VG_XACT_CLO(arg
, "--leak-resolution=low",
6057 MC_(clo_leak_resolution
), Vg_LowRes
) {}
6058 else if VG_XACT_CLO(arg
, "--leak-resolution=med",
6059 MC_(clo_leak_resolution
), Vg_MedRes
) {}
6060 else if VG_XACT_CLO(arg
, "--leak-resolution=high",
6061 MC_(clo_leak_resolution
), Vg_HighRes
) {}
6063 else if VG_STR_CLO(arg
, "--ignore-ranges", tmp_str
) {
6064 Bool ok
= parse_ignore_ranges(tmp_str
);
6066 VG_(message
)(Vg_DebugMsg
,
6067 "ERROR: --ignore-ranges: "
6068 "invalid syntax, or end <= start in range\n");
6071 if (gIgnoredAddressRanges
) {
6073 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
6074 UWord val
= IAR_INVALID
;
6075 UWord key_min
= ~(UWord
)0;
6076 UWord key_max
= (UWord
)0;
6077 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
6078 gIgnoredAddressRanges
, i
);
6079 tl_assert(key_min
<= key_max
);
6080 UWord limit
= 0x4000000; /* 64M - entirely arbitrary limit */
6081 if (key_max
- key_min
> limit
&& val
== IAR_CommandLine
) {
6082 VG_(message
)(Vg_DebugMsg
,
6083 "ERROR: --ignore-ranges: suspiciously large range:\n");
6084 VG_(message
)(Vg_DebugMsg
,
6085 " 0x%lx-0x%lx (size %lu)\n", key_min
, key_max
,
6086 key_max
- key_min
+ 1);
6093 else if VG_STR_CLO(arg
, "--ignore-range-below-sp", tmp_str
) {
6094 /* This seems at first a bit weird, but: in order to imply
6095 a non-wrapped-around address range, the first offset needs to be
6096 larger than the second one. For example
6097 --ignore-range-below-sp=8192,8189
6098 would cause accesses to in the range [SP-8192, SP-8189] to be
6100 UInt offs1
= 0, offs2
= 0;
6101 Bool ok
= parse_UInt_pair(&tmp_str
, &offs1
, &offs2
);
6102 // Ensure we used all the text after the '=' sign.
6103 if (ok
&& *tmp_str
!= 0) ok
= False
;
6105 VG_(message
)(Vg_DebugMsg
,
6106 "ERROR: --ignore-range-below-sp: invalid syntax. "
6107 " Expected \"...=decimalnumber-decimalnumber\".\n");
6110 if (offs1
> 1000*1000 /*arbitrary*/ || offs2
> 1000*1000 /*ditto*/) {
6111 VG_(message
)(Vg_DebugMsg
,
6112 "ERROR: --ignore-range-below-sp: suspiciously large "
6113 "offset(s): %u and %u\n", offs1
, offs2
);
6116 if (offs1
<= offs2
) {
6117 VG_(message
)(Vg_DebugMsg
,
6118 "ERROR: --ignore-range-below-sp: invalid offsets "
6119 "(the first must be larger): %u and %u\n", offs1
, offs2
);
6122 tl_assert(offs1
> offs2
);
6123 if (offs1
- offs2
> 4096 /*arbitrary*/) {
6124 VG_(message
)(Vg_DebugMsg
,
6125 "ERROR: --ignore-range-below-sp: suspiciously large "
6126 "range: %u-%u (size %u)\n", offs1
, offs2
, offs1
- offs2
);
6129 MC_(clo_ignore_range_below_sp
) = True
;
6130 MC_(clo_ignore_range_below_sp__first_offset
) = offs1
;
6131 MC_(clo_ignore_range_below_sp__last_offset
) = offs2
;
6135 else if VG_BHEX_CLO(arg
, "--malloc-fill", MC_(clo_malloc_fill
), 0x00,0xFF) {}
6136 else if VG_BHEX_CLO(arg
, "--free-fill", MC_(clo_free_fill
), 0x00,0xFF) {}
6138 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc",
6139 MC_(clo_keep_stacktraces
), KS_alloc
) {}
6140 else if VG_XACT_CLO(arg
, "--keep-stacktraces=free",
6141 MC_(clo_keep_stacktraces
), KS_free
) {}
6142 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-and-free",
6143 MC_(clo_keep_stacktraces
), KS_alloc_and_free
) {}
6144 else if VG_XACT_CLO(arg
, "--keep-stacktraces=alloc-then-free",
6145 MC_(clo_keep_stacktraces
), KS_alloc_then_free
) {}
6146 else if VG_XACT_CLO(arg
, "--keep-stacktraces=none",
6147 MC_(clo_keep_stacktraces
), KS_none
) {}
6149 else if VG_BOOL_CLO(arg
, "--show-mismatched-frees",
6150 MC_(clo_show_mismatched_frees
)) {}
6152 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=no",
6153 MC_(clo_expensive_definedness_checks
), EdcNO
) {}
6154 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=auto",
6155 MC_(clo_expensive_definedness_checks
), EdcAUTO
) {}
6156 else if VG_XACT_CLO(arg
, "--expensive-definedness-checks=yes",
6157 MC_(clo_expensive_definedness_checks
), EdcYES
) {}
6159 else if VG_BOOL_CLO(arg
, "--xtree-leak",
6160 MC_(clo_xtree_leak
)) {}
6161 else if VG_STR_CLO (arg
, "--xtree-leak-file",
6162 MC_(clo_xtree_leak_file
)) {}
6165 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
6171 VG_(fmsg_bad_option
)(arg
,
6172 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6175 static void mc_print_usage(void)
6178 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6179 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6180 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6181 " [definite,possible]\n"
6182 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6183 " [definite,possible]\n"
6184 " where kind is one of:\n"
6185 " definite indirect possible reachable all none\n"
6186 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6187 " improving leak search false positive [all]\n"
6188 " where heur is one of:\n"
6189 " stdstring length64 newarray multipleinheritance all none\n"
6190 " --show-reachable=yes same as --show-leak-kinds=all\n"
6191 " --show-reachable=no --show-possibly-lost=yes\n"
6192 " same as --show-leak-kinds=definite,possible\n"
6193 " --show-reachable=no --show-possibly-lost=no\n"
6194 " same as --show-leak-kinds=definite\n"
6195 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6196 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6197 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6198 " --track-origins=no|yes show origins of undefined values? [no]\n"
6199 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6200 " --expensive-definedness-checks=no|auto|yes\n"
6201 " Use extra-precise definedness tracking [auto]\n"
6202 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6203 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6204 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6205 " Use --ignore-range-below-sp instead.\n"
6206 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6207 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6208 " accesses at the given offsets below SP\n"
6209 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6210 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6211 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6212 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6213 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6217 static void mc_print_debug_usage(void)
6225 /*------------------------------------------------------------*/
6226 /*--- Client blocks ---*/
6227 /*------------------------------------------------------------*/
6229 /* Client block management:
6231 This is managed as an expanding array of client block descriptors.
6232 Indices of live descriptors are issued to the client, so it can ask
6233 to free them later. Therefore we cannot slide live entries down
6234 over dead ones. Instead we must use free/inuse flags and scan for
6235 an empty slot at allocation time. This in turn means allocation is
6236 relatively expensive, so we hope this does not happen too often.
6238 An unused block has start == size == 0
6241 /* type CGenBlock is defined in mc_include.h */
6243 /* This subsystem is self-initialising. */
6244 static UWord cgb_size
= 0;
6245 static UWord cgb_used
= 0;
6246 static CGenBlock
* cgbs
= NULL
;
6248 /* Stats for this subsystem. */
6249 static ULong cgb_used_MAX
= 0; /* Max in use. */
6250 static ULong cgb_allocs
= 0; /* Number of allocs. */
6251 static ULong cgb_discards
= 0; /* Number of discards. */
6252 static ULong cgb_search
= 0; /* Number of searches. */
6255 /* Get access to the client block array. */
6256 void MC_(get_ClientBlock_array
)( /*OUT*/CGenBlock
** blocks
,
6257 /*OUT*/UWord
* nBlocks
)
6260 *nBlocks
= cgb_used
;
6265 Int
alloc_client_block ( void )
6268 CGenBlock
* cgbs_new
;
6272 for (i
= 0; i
< cgb_used
; i
++) {
6274 if (cgbs
[i
].start
== 0 && cgbs
[i
].size
== 0)
6278 /* Not found. Try to allocate one at the end. */
6279 if (cgb_used
< cgb_size
) {
6284 /* Ok, we have to allocate a new one. */
6285 tl_assert(cgb_used
== cgb_size
);
6286 sz_new
= (cgbs
== NULL
) ? 10 : (2 * cgb_size
);
6288 cgbs_new
= VG_(malloc
)( "mc.acb.1", sz_new
* sizeof(CGenBlock
) );
6289 for (i
= 0; i
< cgb_used
; i
++)
6290 cgbs_new
[i
] = cgbs
[i
];
6298 if (cgb_used
> cgb_used_MAX
)
6299 cgb_used_MAX
= cgb_used
;
6304 static void show_client_block_stats ( void )
6306 VG_(message
)(Vg_DebugMsg
,
6307 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6308 cgb_allocs
, cgb_discards
, cgb_used_MAX
, cgb_search
6311 static void print_monitor_help ( void )
6316 "memcheck monitor commands:\n"
6317 " xb <addr> [<len>]\n"
6318 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6319 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6320 " Then prints the bytes values below the corresponding validity bits\n"
6321 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6322 " Example: xb 0x8049c78 10\n"
6323 " get_vbits <addr> [<len>]\n"
6324 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6325 " make_memory [noaccess|undefined\n"
6326 " |defined|Definedifaddressable] <addr> [<len>]\n"
6327 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6328 " check_memory [addressable|defined] <addr> [<len>]\n"
6329 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6330 " and outputs a description of <addr>\n"
6331 " leak_check [full*|summary|xtleak]\n"
6332 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6333 " [heuristics heur1,heur2,...]\n"
6334 " [increased*|changed|any]\n"
6335 " [unlimited*|limited <max_loss_records_output>]\n"
6337 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6338 " where kind is one of:\n"
6339 " definite indirect possible reachable all none\n"
6340 " where heur is one of:\n"
6341 " stdstring length64 newarray multipleinheritance all none*\n"
6342 " Examples: leak_check\n"
6343 " leak_check summary any\n"
6344 " leak_check full kinds indirect,possible\n"
6345 " leak_check full reachable any limited 100\n"
6346 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6347 " [unlimited*|limited <max_blocks>]\n"
6348 " [heuristics heur1,heur2,...]\n"
6349 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6350 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6351 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6353 " who_points_at <addr> [<len>]\n"
6354 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6355 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6356 " with len > 1, will also show \"interior pointers\")\n"
6357 " xtmemory [<filename>]\n"
6358 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6362 /* Print szB bytes at address, with a format similar to the gdb command
6364 res[i] == 1 indicates the corresponding byte is addressable. */
6365 static void gdb_xb (Addr address
, SizeT szB
, Int res
[])
6369 for (i
= 0; i
< szB
; i
++) {
6373 VG_(printf
) ("\n"); // Terminate previous line
6374 VG_(printf
) ("%p:", (void*)(address
+i
));
6377 VG_(printf
) ("\t0x%02x", *(UChar
*)(address
+i
));
6379 VG_(printf
) ("\t0x??");
6381 VG_(printf
) ("\n"); // Terminate previous line
6385 /* Returns the address of the next non space character,
6386 or address of the string terminator. */
6387 static HChar
* next_non_space (HChar
*s
)
6389 while (*s
&& *s
== ' ')
6394 /* Parse an integer slice, i.e. a single integer or a range of integer.
6396 <integer>[..<integer> ]
6397 (spaces are allowed before and/or after ..).
6398 Return True if range correctly parsed, False otherwise. */
6399 static Bool
VG_(parse_slice
) (HChar
* s
, HChar
** saveptr
,
6400 UInt
*from
, UInt
*to
)
6405 wl
= VG_(strtok_r
) (s
, " ", saveptr
);
6407 /* slice must start with an integer. */
6409 VG_(gdb_printf
) ("expecting integer or slice <from>..<to>\n");
6412 *from
= VG_(strtoull10
) (wl
, &endptr
);
6414 VG_(gdb_printf
) ("invalid integer or slice <from>..<to>\n");
6418 if (*endptr
== '\0' && *next_non_space(*saveptr
) != '.') {
6419 /* wl token is an integer terminating the string
6420 or else next token does not start with .
6421 In both cases, the slice is a single integer. */
6426 if (*endptr
== '\0') {
6427 // iii .. => get the next token
6428 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6431 if (*endptr
!= '.' && *(endptr
+1) != '.') {
6432 VG_(gdb_printf
) ("expecting slice <from>..<to>\n");
6435 if ( *(endptr
+2) == ' ') {
6436 // It must be iii.. jjj => get the next token
6437 wl
= VG_(strtok_r
) (NULL
, " .", saveptr
);
6439 // It must be iii..jjj
6444 *to
= VG_(strtoull10
) (wl
, &endptr
);
6445 if (*endptr
!= '\0') {
6446 VG_(gdb_printf
) ("missing/wrong 'to' of slice <from>..<to>\n");
6451 VG_(gdb_printf
) ("<from> cannot be bigger than <to> "
6452 "in slice <from>..<to>\n");
6459 /* return True if request recognised, False otherwise */
6460 static Bool
handle_gdb_monitor_command (ThreadId tid
, HChar
*req
)
6463 HChar s
[VG_(strlen
)(req
) + 1]; /* copy for strtok_r */
6466 VG_(strcpy
) (s
, req
);
6468 wcmd
= VG_(strtok_r
) (s
, " ", &ssaveptr
);
6469 /* NB: if possible, avoid introducing a new command below which
6470 starts with the same first letter(s) as an already existing
6471 command. This ensures a shorter abbreviation for the user. */
6472 switch (VG_(keyword_id
)
6473 ("help get_vbits leak_check make_memory check_memory "
6474 "block_list who_points_at xb xtmemory",
6475 wcmd
, kwd_report_duplicated_matches
)) {
6476 case -2: /* multiple matches */
6478 case -1: /* not found */
6481 print_monitor_help();
6483 case 1: { /* get_vbits */
6486 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6489 Int unaddressable
= 0;
6490 for (i
= 0; i
< szB
; i
++) {
6491 Int res
= mc_get_or_set_vbits_for_client
6492 (address
+i
, (Addr
) &vbits
, 1,
6493 False
, /* get them */
6494 False
/* is client request */ );
6495 /* we are before the first character on next line, print a \n. */
6496 if ((i
% 32) == 0 && i
!= 0)
6498 /* we are before the next block of 4 starts, print a space. */
6499 else if ((i
% 4) == 0 && i
!= 0)
6502 VG_(printf
) ("%02x", vbits
);
6504 tl_assert(3 == res
);
6510 if (unaddressable
) {
6512 ("Address %p len %lu has %d bytes unaddressable\n",
6513 (void *)address
, szB
, unaddressable
);
6518 case 2: { /* leak_check */
6520 LeakCheckParams lcp
;
6521 HChar
* xt_filename
= NULL
;
6525 lcp
.show_leak_kinds
= R2S(Possible
) | R2S(Unreached
);
6526 lcp
.errors_for_leak_kinds
= 0; // no errors for interactive leak search.
6528 lcp
.deltamode
= LCD_Increased
;
6529 lcp
.max_loss_records_output
= 999999999;
6530 lcp
.requested_by_monitor_command
= True
;
6531 lcp
.xt_filename
= NULL
;
6533 for (kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6535 kw
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6536 switch (VG_(keyword_id
)
6537 ("full summary xtleak "
6538 "kinds reachable possibleleak definiteleak "
6540 "increased changed any "
6541 "unlimited limited ",
6542 kw
, kwd_report_all
)) {
6543 case -2: err
++; break;
6544 case -1: err
++; break;
6546 lcp
.mode
= LC_Full
; break;
6547 case 1: /* summary */
6548 lcp
.mode
= LC_Summary
; break;
6549 case 2: /* xtleak */
6552 = VG_(expand_file_name
)("--xtleak-mc_main.c",
6553 "xtleak.kcg.%p.%n");
6554 lcp
.xt_filename
= xt_filename
;
6556 case 3: { /* kinds */
6557 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6559 || !VG_(parse_enum_set
)(MC_(parse_leak_kinds_tokens
),
6562 &lcp
.show_leak_kinds
)) {
6563 VG_(gdb_printf
) ("missing or malformed leak kinds set\n");
6568 case 4: /* reachable */
6569 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
6571 case 5: /* possibleleak */
6573 = R2S(Possible
) | R2S(IndirectLeak
) | R2S(Unreached
);
6575 case 6: /* definiteleak */
6576 lcp
.show_leak_kinds
= R2S(Unreached
);
6578 case 7: { /* heuristics */
6579 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6581 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6585 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6590 case 8: /* increased */
6591 lcp
.deltamode
= LCD_Increased
; break;
6592 case 9: /* changed */
6593 lcp
.deltamode
= LCD_Changed
; break;
6595 lcp
.deltamode
= LCD_Any
; break;
6596 case 11: /* unlimited */
6597 lcp
.max_loss_records_output
= 999999999; break;
6598 case 12: { /* limited */
6600 const HChar
* endptr
;
6602 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6605 endptr
= "empty"; /* to report an error below */
6608 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6611 if (*endptr
!= '\0')
6612 VG_(gdb_printf
) ("missing or malformed integer value\n");
6613 else if (int_value
> 0)
6614 lcp
.max_loss_records_output
= (UInt
) int_value
;
6616 VG_(gdb_printf
) ("max_loss_records_output must be >= 1,"
6617 " got %d\n", int_value
);
6625 MC_(detect_memory_leaks
)(tid
, &lcp
);
6626 if (xt_filename
!= NULL
)
6627 VG_(free
)(xt_filename
);
6631 case 3: { /* make_memory */
6634 Int kwdid
= VG_(keyword_id
)
6635 ("noaccess undefined defined Definedifaddressable",
6636 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6637 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6642 case 0: MC_(make_mem_noaccess
) (address
, szB
); break;
6643 case 1: make_mem_undefined_w_tid_and_okind ( address
, szB
, tid
,
6644 MC_OKIND_USER
); break;
6645 case 2: MC_(make_mem_defined
) ( address
, szB
); break;
6646 case 3: make_mem_defined_if_addressable ( address
, szB
); break;;
6647 default: tl_assert(0);
6652 case 4: { /* check_memory */
6660 ExeContext
* origin_ec
;
6663 Int kwdid
= VG_(keyword_id
)
6664 ("addressable defined",
6665 VG_(strtok_r
) (NULL
, " ", &ssaveptr
), kwd_report_all
);
6666 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6671 case 0: /* addressable */
6672 if (is_mem_addressable ( address
, szB
, &bad_addr
))
6673 VG_(printf
) ("Address %p len %lu addressable\n",
6674 (void *)address
, szB
);
6677 ("Address %p len %lu not addressable:\nbad address %p\n",
6678 (void *)address
, szB
, (void *) bad_addr
);
6679 // Describe this (probably live) address with current epoch
6680 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6682 case 1: /* defined */
6683 res
= is_mem_defined ( address
, szB
, &bad_addr
, &otag
);
6684 if (MC_AddrErr
== res
)
6686 ("Address %p len %lu not addressable:\nbad address %p\n",
6687 (void *)address
, szB
, (void *) bad_addr
);
6688 else if (MC_ValueErr
== res
) {
6691 case MC_OKIND_STACK
:
6692 src
= " was created by a stack allocation"; break;
6694 src
= " was created by a heap allocation"; break;
6696 src
= " was created by a client request"; break;
6697 case MC_OKIND_UNKNOWN
:
6699 default: tl_assert(0);
6702 ("Address %p len %lu not defined:\n"
6703 "Uninitialised value at %p%s\n",
6704 (void *)address
, szB
, (void *) bad_addr
, src
);
6706 if (VG_(is_plausible_ECU
)(ecu
)) {
6707 origin_ec
= VG_(get_ExeContext_from_ECU
)( ecu
);
6708 VG_(pp_ExeContext
)( origin_ec
);
6712 VG_(printf
) ("Address %p len %lu defined\n",
6713 (void *)address
, szB
);
6714 // Describe this (probably live) address with current epoch
6715 MC_(pp_describe_addr
) (VG_(current_DiEpoch
)(), address
);
6717 default: tl_assert(0);
6722 case 5: { /* block_list */
6725 UInt lr_nr_from
= 0;
6728 if (VG_(parse_slice
) (NULL
, &ssaveptr
, &lr_nr_from
, &lr_nr_to
)) {
6729 UInt limit_blocks
= 999999999;
6731 UInt heuristics
= 0;
6733 for (wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6735 wl
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
)) {
6736 switch (VG_(keyword_id
) ("unlimited limited heuristics ",
6737 wl
, kwd_report_all
)) {
6738 case -2: return True
;
6739 case -1: return True
;
6740 case 0: /* unlimited */
6741 limit_blocks
= 999999999; break;
6742 case 1: /* limited */
6743 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6745 VG_(gdb_printf
) ("missing integer value\n");
6748 int_value
= VG_(strtoll10
) (wcmd
, &the_end
);
6749 if (*the_end
!= '\0') {
6750 VG_(gdb_printf
) ("malformed integer value\n");
6753 if (int_value
<= 0) {
6754 VG_(gdb_printf
) ("max_blocks must be >= 1,"
6755 " got %d\n", int_value
);
6758 limit_blocks
= (UInt
) int_value
;
6760 case 2: /* heuristics */
6761 wcmd
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6763 || !VG_(parse_enum_set
)(MC_(parse_leak_heuristics_tokens
),
6767 VG_(gdb_printf
) ("missing or malformed heuristics set\n");
6775 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6776 is 1 more than the index in lr_array. */
6777 if (lr_nr_from
== 0 || ! MC_(print_block_list
) (lr_nr_from
-1,
6781 VG_(gdb_printf
) ("invalid loss record nr\n");
6786 case 6: { /* who_points_at */
6790 if (!VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
))
6792 if (address
== (Addr
) 0) {
6793 VG_(gdb_printf
) ("Cannot search who points at 0x0\n");
6796 MC_(who_points_at
) (address
, szB
);
6803 if (VG_(strtok_get_address_and_size
) (&address
, &szB
, &ssaveptr
)) {
6807 Int unaddressable
= 0;
6808 for (i
= 0; i
< szB
; i
++) {
6810 res
[bnr
] = mc_get_or_set_vbits_for_client
6811 (address
+i
, (Addr
) &vbits
[bnr
], 1,
6812 False
, /* get them */
6813 False
/* is client request */ );
6814 /* We going to print the first vabits of a new line.
6815 Terminate the previous line if needed: prints a line with the
6816 address and the data. */
6820 gdb_xb (address
+ i
- 8, 8, res
);
6822 VG_(printf
) ("\t"); // To align VABITS with gdb_xb layout
6824 if (res
[bnr
] == 1) {
6825 VG_(printf
) ("\t %02x", vbits
[bnr
]);
6827 tl_assert(3 == res
[bnr
]);
6829 VG_(printf
) ("\t __");
6833 if (szB
% 8 == 0 && szB
> 0)
6834 gdb_xb (address
+ szB
- 8, 8, res
);
6836 gdb_xb (address
+ szB
- szB
% 8, szB
% 8, res
);
6837 if (unaddressable
) {
6839 ("Address %p len %lu has %d bytes unaddressable\n",
6840 (void *)address
, szB
, unaddressable
);
6846 case 8: { /* xtmemory */
6848 filename
= VG_(strtok_r
) (NULL
, " ", &ssaveptr
);
6849 MC_(xtmemory_report
)(filename
, False
);
6859 /*------------------------------------------------------------*/
6860 /*--- Client requests ---*/
6861 /*------------------------------------------------------------*/
6863 static Bool
mc_handle_client_request ( ThreadId tid
, UWord
* arg
, UWord
* ret
)
6868 if (!VG_IS_TOOL_USERREQ('M','C',arg
[0])
6869 && VG_USERREQ__MALLOCLIKE_BLOCK
!= arg
[0]
6870 && VG_USERREQ__RESIZEINPLACE_BLOCK
!= arg
[0]
6871 && VG_USERREQ__FREELIKE_BLOCK
!= arg
[0]
6872 && VG_USERREQ__CREATE_MEMPOOL
!= arg
[0]
6873 && VG_USERREQ__DESTROY_MEMPOOL
!= arg
[0]
6874 && VG_USERREQ__MEMPOOL_ALLOC
!= arg
[0]
6875 && VG_USERREQ__MEMPOOL_FREE
!= arg
[0]
6876 && VG_USERREQ__MEMPOOL_TRIM
!= arg
[0]
6877 && VG_USERREQ__MOVE_MEMPOOL
!= arg
[0]
6878 && VG_USERREQ__MEMPOOL_CHANGE
!= arg
[0]
6879 && VG_USERREQ__MEMPOOL_EXISTS
!= arg
[0]
6880 && VG_USERREQ__GDB_MONITOR_COMMAND
!= arg
[0]
6881 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0]
6882 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
!= arg
[0])
6886 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE
: {
6887 Bool ok
= is_mem_addressable ( arg
[1], arg
[2], &bad_addr
);
6889 MC_(record_user_error
) ( tid
, bad_addr
, /*isAddrErr*/True
, 0 );
6890 *ret
= ok
? (UWord
)NULL
: bad_addr
;
6894 case VG_USERREQ__CHECK_MEM_IS_DEFINED
: {
6895 Bool errorV
= False
;
6898 Bool errorA
= False
;
6900 is_mem_defined_comprehensive(
6902 &errorV
, &bad_addrV
, &otagV
, &errorA
, &bad_addrA
6905 MC_(record_user_error
) ( tid
, bad_addrV
,
6906 /*isAddrErr*/False
, otagV
);
6909 MC_(record_user_error
) ( tid
, bad_addrA
,
6910 /*isAddrErr*/True
, 0 );
6912 /* Return the lower of the two erring addresses, if any. */
6914 if (errorV
&& !errorA
) {
6917 if (!errorV
&& errorA
) {
6920 if (errorV
&& errorA
) {
6921 *ret
= bad_addrV
< bad_addrA
? bad_addrV
: bad_addrA
;
6926 case VG_USERREQ__DO_LEAK_CHECK
: {
6927 LeakCheckParams lcp
;
6931 else if (arg
[1] == 1)
6932 lcp
.mode
= LC_Summary
;
6934 VG_(message
)(Vg_UserMsg
,
6935 "Warning: unknown memcheck leak search mode\n");
6939 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
6940 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
6941 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
6944 lcp
.deltamode
= LCD_Any
;
6945 else if (arg
[2] == 1)
6946 lcp
.deltamode
= LCD_Increased
;
6947 else if (arg
[2] == 2)
6948 lcp
.deltamode
= LCD_Changed
;
6952 "Warning: unknown memcheck leak search deltamode\n");
6953 lcp
.deltamode
= LCD_Any
;
6955 lcp
.max_loss_records_output
= 999999999;
6956 lcp
.requested_by_monitor_command
= False
;
6957 lcp
.xt_filename
= NULL
;
6959 MC_(detect_memory_leaks
)(tid
, &lcp
);
6960 *ret
= 0; /* return value is meaningless */
6964 case VG_USERREQ__MAKE_MEM_NOACCESS
:
6965 MC_(make_mem_noaccess
) ( arg
[1], arg
[2] );
6969 case VG_USERREQ__MAKE_MEM_UNDEFINED
:
6970 make_mem_undefined_w_tid_and_okind ( arg
[1], arg
[2], tid
,
6975 case VG_USERREQ__MAKE_MEM_DEFINED
:
6976 MC_(make_mem_defined
) ( arg
[1], arg
[2] );
6980 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE
:
6981 make_mem_defined_if_addressable ( arg
[1], arg
[2] );
6985 case VG_USERREQ__CREATE_BLOCK
: /* describe a block */
6986 if (arg
[1] != 0 && arg
[2] != 0) {
6987 i
= alloc_client_block();
6988 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
6989 cgbs
[i
].start
= arg
[1];
6990 cgbs
[i
].size
= arg
[2];
6991 cgbs
[i
].desc
= VG_(strdup
)("mc.mhcr.1", (HChar
*)arg
[3]);
6992 cgbs
[i
].where
= VG_(record_ExeContext
) ( tid
, 0/*first_ip_delta*/ );
6998 case VG_USERREQ__DISCARD
: /* discard */
7000 || arg
[2] >= cgb_used
||
7001 (cgbs
[arg
[2]].start
== 0 && cgbs
[arg
[2]].size
== 0)) {
7004 tl_assert(arg
[2] >= 0 && arg
[2] < cgb_used
);
7005 cgbs
[arg
[2]].start
= cgbs
[arg
[2]].size
= 0;
7006 VG_(free
)(cgbs
[arg
[2]].desc
);
7012 case VG_USERREQ__GET_VBITS
:
7013 *ret
= mc_get_or_set_vbits_for_client
7014 ( arg
[1], arg
[2], arg
[3],
7015 False
/* get them */,
7016 True
/* is client request */ );
7019 case VG_USERREQ__SET_VBITS
:
7020 *ret
= mc_get_or_set_vbits_for_client
7021 ( arg
[1], arg
[2], arg
[3],
7022 True
/* set them */,
7023 True
/* is client request */ );
7026 case VG_USERREQ__COUNT_LEAKS
: { /* count leaked bytes */
7027 UWord
** argp
= (UWord
**)arg
;
7028 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7029 // if no prior leak checks performed).
7030 *argp
[1] = MC_(bytes_leaked
) + MC_(bytes_indirect
);
7031 *argp
[2] = MC_(bytes_dubious
);
7032 *argp
[3] = MC_(bytes_reachable
);
7033 *argp
[4] = MC_(bytes_suppressed
);
7034 // there is no argp[5]
7035 //*argp[5] = MC_(bytes_indirect);
7036 // XXX need to make *argp[1-4] defined; currently done in the
7037 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7041 case VG_USERREQ__COUNT_LEAK_BLOCKS
: { /* count leaked blocks */
7042 UWord
** argp
= (UWord
**)arg
;
7043 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7044 // if no prior leak checks performed).
7045 *argp
[1] = MC_(blocks_leaked
) + MC_(blocks_indirect
);
7046 *argp
[2] = MC_(blocks_dubious
);
7047 *argp
[3] = MC_(blocks_reachable
);
7048 *argp
[4] = MC_(blocks_suppressed
);
7049 // there is no argp[5]
7050 //*argp[5] = MC_(blocks_indirect);
7051 // XXX need to make *argp[1-4] defined; currently done in the
7052 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7056 case VG_USERREQ__MALLOCLIKE_BLOCK
: {
7057 Addr p
= (Addr
)arg
[1];
7058 SizeT sizeB
= arg
[2];
7060 Bool is_zeroed
= (Bool
)arg
[4];
7062 MC_(new_block
) ( tid
, p
, sizeB
, /*ignored*/0, is_zeroed
,
7063 MC_AllocCustom
, MC_(malloc_list
) );
7065 MC_(make_mem_noaccess
) ( p
- rzB
, rzB
);
7066 MC_(make_mem_noaccess
) ( p
+ sizeB
, rzB
);
7070 case VG_USERREQ__RESIZEINPLACE_BLOCK
: {
7071 Addr p
= (Addr
)arg
[1];
7072 SizeT oldSizeB
= arg
[2];
7073 SizeT newSizeB
= arg
[3];
7076 MC_(handle_resizeInPlace
) ( tid
, p
, oldSizeB
, newSizeB
, rzB
);
7079 case VG_USERREQ__FREELIKE_BLOCK
: {
7080 Addr p
= (Addr
)arg
[1];
7083 MC_(handle_free
) ( tid
, p
, rzB
, MC_AllocCustom
);
7087 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR
: {
7088 HChar
* s
= (HChar
*)arg
[1];
7089 Addr dst
= (Addr
) arg
[2];
7090 Addr src
= (Addr
) arg
[3];
7091 SizeT len
= (SizeT
)arg
[4];
7092 MC_(record_overlap_error
)(tid
, s
, src
, dst
, len
);
7096 case VG_USERREQ__CREATE_MEMPOOL
: {
7097 Addr pool
= (Addr
)arg
[1];
7099 Bool is_zeroed
= (Bool
)arg
[3];
7100 UInt flags
= arg
[4];
7102 // The create_mempool function does not know these mempool flags,
7103 // pass as booleans.
7104 MC_(create_mempool
) ( pool
, rzB
, is_zeroed
,
7105 (flags
& VALGRIND_MEMPOOL_AUTO_FREE
),
7106 (flags
& VALGRIND_MEMPOOL_METAPOOL
) );
7110 case VG_USERREQ__DESTROY_MEMPOOL
: {
7111 Addr pool
= (Addr
)arg
[1];
7113 MC_(destroy_mempool
) ( pool
);
7117 case VG_USERREQ__MEMPOOL_ALLOC
: {
7118 Addr pool
= (Addr
)arg
[1];
7119 Addr addr
= (Addr
)arg
[2];
7122 MC_(mempool_alloc
) ( tid
, pool
, addr
, size
);
7126 case VG_USERREQ__MEMPOOL_FREE
: {
7127 Addr pool
= (Addr
)arg
[1];
7128 Addr addr
= (Addr
)arg
[2];
7130 MC_(mempool_free
) ( pool
, addr
);
7134 case VG_USERREQ__MEMPOOL_TRIM
: {
7135 Addr pool
= (Addr
)arg
[1];
7136 Addr addr
= (Addr
)arg
[2];
7139 MC_(mempool_trim
) ( pool
, addr
, size
);
7143 case VG_USERREQ__MOVE_MEMPOOL
: {
7144 Addr poolA
= (Addr
)arg
[1];
7145 Addr poolB
= (Addr
)arg
[2];
7147 MC_(move_mempool
) ( poolA
, poolB
);
7151 case VG_USERREQ__MEMPOOL_CHANGE
: {
7152 Addr pool
= (Addr
)arg
[1];
7153 Addr addrA
= (Addr
)arg
[2];
7154 Addr addrB
= (Addr
)arg
[3];
7157 MC_(mempool_change
) ( pool
, addrA
, addrB
, size
);
7161 case VG_USERREQ__MEMPOOL_EXISTS
: {
7162 Addr pool
= (Addr
)arg
[1];
7164 *ret
= (UWord
) MC_(mempool_exists
) ( pool
);
7168 case VG_USERREQ__GDB_MONITOR_COMMAND
: {
7169 Bool handled
= handle_gdb_monitor_command (tid
, (HChar
*)arg
[1]);
7177 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
:
7178 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE
: {
7180 = arg
[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE
;
7182 = modify_ignore_ranges(addRange
, arg
[1], arg
[2]);
7190 "Warning: unknown memcheck client request code %llx\n",
7199 /*------------------------------------------------------------*/
7200 /*--- Crude profiling machinery. ---*/
7201 /*------------------------------------------------------------*/
7203 // We track a number of interesting events (using PROF_EVENT)
7204 // if MC_PROFILE_MEMORY is defined.
7206 #ifdef MC_PROFILE_MEMORY
7208 ULong
MC_(event_ctr
)[MCPE_LAST
];
7210 /* Event counter names. Use the name of the function that increases the
7211 event counter. Drop any MC_() and mc_ prefices. */
7212 static const HChar
* MC_(event_ctr_name
)[MCPE_LAST
] = {
7213 [MCPE_LOADVN_SLOW
] = "LOADVn_slow",
7214 [MCPE_LOADVN_SLOW_LOOP
] = "LOADVn_slow_loop",
7215 [MCPE_STOREVN_SLOW
] = "STOREVn_slow",
7216 [MCPE_STOREVN_SLOW_LOOP
] = "STOREVn_slow(loop)",
7217 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED
] = "make_aligned_word32_undefined",
7218 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW
] =
7219 "make_aligned_word32_undefined_slow",
7220 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED
] = "make_aligned_word64_undefined",
7221 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW
] =
7222 "make_aligned_word64_undefined_slow",
7223 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS
] = "make_aligned_word32_noaccess",
7224 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW
] =
7225 "make_aligned_word32_noaccess_slow",
7226 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS
] = "make_aligned_word64_noaccess",
7227 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW
] =
7228 "make_aligned_word64_noaccess_slow",
7229 [MCPE_MAKE_MEM_NOACCESS
] = "make_mem_noaccess",
7230 [MCPE_MAKE_MEM_UNDEFINED
] = "make_mem_undefined",
7231 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG
] = "make_mem_undefined_w_otag",
7232 [MCPE_MAKE_MEM_DEFINED
] = "make_mem_defined",
7233 [MCPE_CHEAP_SANITY_CHECK
] = "cheap_sanity_check",
7234 [MCPE_EXPENSIVE_SANITY_CHECK
] = "expensive_sanity_check",
7235 [MCPE_COPY_ADDRESS_RANGE_STATE
] = "copy_address_range_state",
7236 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1
] = "copy_address_range_state(loop1)",
7237 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2
] = "copy_address_range_state(loop2)",
7238 [MCPE_CHECK_MEM_IS_NOACCESS
] = "check_mem_is_noaccess",
7239 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP
] = "check_mem_is_noaccess(loop)",
7240 [MCPE_IS_MEM_ADDRESSABLE
] = "is_mem_addressable",
7241 [MCPE_IS_MEM_ADDRESSABLE_LOOP
] = "is_mem_addressable(loop)",
7242 [MCPE_IS_MEM_DEFINED
] = "is_mem_defined",
7243 [MCPE_IS_MEM_DEFINED_LOOP
] = "is_mem_defined(loop)",
7244 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE
] = "is_mem_defined_comprehensive",
7245 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP
] =
7246 "is_mem_defined_comprehensive(loop)",
7247 [MCPE_IS_DEFINED_ASCIIZ
] = "is_defined_asciiz",
7248 [MCPE_IS_DEFINED_ASCIIZ_LOOP
] = "is_defined_asciiz(loop)",
7249 [MCPE_FIND_CHUNK_FOR_OLD
] = "find_chunk_for_OLD",
7250 [MCPE_FIND_CHUNK_FOR_OLD_LOOP
] = "find_chunk_for_OLD(loop)",
7251 [MCPE_SET_ADDRESS_RANGE_PERMS
] = "set_address_range_perms",
7252 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP
] =
7253 "set_address_range_perms(single-secmap)",
7254 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP
] =
7255 "set_address_range_perms(startof-secmap)",
7256 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS
] =
7257 "set_address_range_perms(multiple-secmaps)",
7258 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1
] =
7259 "set_address_range_perms(dist-sm1)",
7260 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2
] =
7261 "set_address_range_perms(dist-sm2)",
7262 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK
] =
7263 "set_address_range_perms(dist-sm1-quick)",
7264 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK
] =
7265 "set_address_range_perms(dist-sm2-quick)",
7266 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A
] = "set_address_range_perms(loop1a)",
7267 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B
] = "set_address_range_perms(loop1b)",
7268 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C
] = "set_address_range_perms(loop1c)",
7269 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A
] = "set_address_range_perms(loop8a)",
7270 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B
] = "set_address_range_perms(loop8b)",
7271 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K
] = "set_address_range_perms(loop64K)",
7272 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM
] =
7273 "set_address_range_perms(loop64K-free-dist-sm)",
7274 [MCPE_LOADV_128_OR_256_SLOW_LOOP
] = "LOADV_128_or_256_slow(loop)",
7275 [MCPE_LOADV_128_OR_256
] = "LOADV_128_or_256",
7276 [MCPE_LOADV_128_OR_256_SLOW1
] = "LOADV_128_or_256-slow1",
7277 [MCPE_LOADV_128_OR_256_SLOW2
] = "LOADV_128_or_256-slow2",
7278 [MCPE_LOADV64
] = "LOADV64",
7279 [MCPE_LOADV64_SLOW1
] = "LOADV64-slow1",
7280 [MCPE_LOADV64_SLOW2
] = "LOADV64-slow2",
7281 [MCPE_STOREV64
] = "STOREV64",
7282 [MCPE_STOREV64_SLOW1
] = "STOREV64-slow1",
7283 [MCPE_STOREV64_SLOW2
] = "STOREV64-slow2",
7284 [MCPE_STOREV64_SLOW3
] = "STOREV64-slow3",
7285 [MCPE_STOREV64_SLOW4
] = "STOREV64-slow4",
7286 [MCPE_LOADV32
] = "LOADV32",
7287 [MCPE_LOADV32_SLOW1
] = "LOADV32-slow1",
7288 [MCPE_LOADV32_SLOW2
] = "LOADV32-slow2",
7289 [MCPE_STOREV32
] = "STOREV32",
7290 [MCPE_STOREV32_SLOW1
] = "STOREV32-slow1",
7291 [MCPE_STOREV32_SLOW2
] = "STOREV32-slow2",
7292 [MCPE_STOREV32_SLOW3
] = "STOREV32-slow3",
7293 [MCPE_STOREV32_SLOW4
] = "STOREV32-slow4",
7294 [MCPE_LOADV16
] = "LOADV16",
7295 [MCPE_LOADV16_SLOW1
] = "LOADV16-slow1",
7296 [MCPE_LOADV16_SLOW2
] = "LOADV16-slow2",
7297 [MCPE_STOREV16
] = "STOREV16",
7298 [MCPE_STOREV16_SLOW1
] = "STOREV16-slow1",
7299 [MCPE_STOREV16_SLOW2
] = "STOREV16-slow2",
7300 [MCPE_STOREV16_SLOW3
] = "STOREV16-slow3",
7301 [MCPE_STOREV16_SLOW4
] = "STOREV16-slow4",
7302 [MCPE_LOADV8
] = "LOADV8",
7303 [MCPE_LOADV8_SLOW1
] = "LOADV8-slow1",
7304 [MCPE_LOADV8_SLOW2
] = "LOADV8-slow2",
7305 [MCPE_STOREV8
] = "STOREV8",
7306 [MCPE_STOREV8_SLOW1
] = "STOREV8-slow1",
7307 [MCPE_STOREV8_SLOW2
] = "STOREV8-slow2",
7308 [MCPE_STOREV8_SLOW3
] = "STOREV8-slow3",
7309 [MCPE_STOREV8_SLOW4
] = "STOREV8-slow4",
7310 [MCPE_NEW_MEM_STACK_4
] = "new_mem_stack_4",
7311 [MCPE_NEW_MEM_STACK_8
] = "new_mem_stack_8",
7312 [MCPE_NEW_MEM_STACK_12
] = "new_mem_stack_12",
7313 [MCPE_NEW_MEM_STACK_16
] = "new_mem_stack_16",
7314 [MCPE_NEW_MEM_STACK_32
] = "new_mem_stack_32",
7315 [MCPE_NEW_MEM_STACK_112
] = "new_mem_stack_112",
7316 [MCPE_NEW_MEM_STACK_128
] = "new_mem_stack_128",
7317 [MCPE_NEW_MEM_STACK_144
] = "new_mem_stack_144",
7318 [MCPE_NEW_MEM_STACK_160
] = "new_mem_stack_160",
7319 [MCPE_DIE_MEM_STACK_4
] = "die_mem_stack_4",
7320 [MCPE_DIE_MEM_STACK_8
] = "die_mem_stack_8",
7321 [MCPE_DIE_MEM_STACK_12
] = "die_mem_stack_12",
7322 [MCPE_DIE_MEM_STACK_16
] = "die_mem_stack_16",
7323 [MCPE_DIE_MEM_STACK_32
] = "die_mem_stack_32",
7324 [MCPE_DIE_MEM_STACK_112
] = "die_mem_stack_112",
7325 [MCPE_DIE_MEM_STACK_128
] = "die_mem_stack_128",
7326 [MCPE_DIE_MEM_STACK_144
] = "die_mem_stack_144",
7327 [MCPE_DIE_MEM_STACK_160
] = "die_mem_stack_160",
7328 [MCPE_NEW_MEM_STACK
] = "new_mem_stack",
7329 [MCPE_DIE_MEM_STACK
] = "die_mem_stack",
7330 [MCPE_MAKE_STACK_UNINIT_W_O
] = "MAKE_STACK_UNINIT_w_o",
7331 [MCPE_MAKE_STACK_UNINIT_NO_O
] = "MAKE_STACK_UNINIT_no_o",
7332 [MCPE_MAKE_STACK_UNINIT_128_NO_O
] = "MAKE_STACK_UNINIT_128_no_o",
7333 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16
]
7334 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7335 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8
]
7336 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7337 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE
]
7338 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7341 static void init_prof_mem ( void )
7343 Int i
, name_count
= 0;
7345 for (i
= 0; i
< MCPE_LAST
; i
++) {
7346 MC_(event_ctr
)[i
] = 0;
7347 if (MC_(event_ctr_name
)[i
] != NULL
)
7351 /* Make sure every profiling event has a name */
7352 tl_assert(name_count
== MCPE_LAST
);
7355 static void done_prof_mem ( void )
7358 Bool spaced
= False
;
7359 for (i
= n
= 0; i
< MCPE_LAST
; i
++) {
7360 if (!spaced
&& (n
% 10) == 0) {
7364 if (MC_(event_ctr
)[i
] > 0) {
7367 VG_(printf
)( "prof mem event %3d: %11llu %s\n",
7368 i
, MC_(event_ctr
)[i
],
7369 MC_(event_ctr_name
)[i
]);
7376 static void init_prof_mem ( void ) { }
7377 static void done_prof_mem ( void ) { }
7382 /*------------------------------------------------------------*/
7383 /*--- Origin tracking stuff ---*/
7384 /*------------------------------------------------------------*/
7386 /*--------------------------------------------*/
7387 /*--- Origin tracking: load handlers ---*/
7388 /*--------------------------------------------*/
7390 static INLINE UInt
merge_origins ( UInt or1
, UInt or2
) {
7391 return or1
> or2
? or1
: or2
;
7394 UWord
VG_REGPARM(1) MC_(helperc_b_load1
)( Addr a
) {
7397 UWord lineoff
= oc_line_offset(a
);
7398 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7400 if (OC_ENABLE_ASSERTIONS
) {
7401 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7404 line
= find_OCacheLine( a
);
7406 descr
= line
->descr
[lineoff
];
7407 if (OC_ENABLE_ASSERTIONS
) {
7408 tl_assert(descr
< 0x10);
7411 if (LIKELY(0 == (descr
& (1 << byteoff
)))) {
7414 return line
->w32
[lineoff
];
7418 UWord
VG_REGPARM(1) MC_(helperc_b_load2
)( Addr a
) {
7421 UWord lineoff
, byteoff
;
7423 if (UNLIKELY(a
& 1)) {
7424 /* Handle misaligned case, slowly. */
7425 UInt oLo
= (UInt
)MC_(helperc_b_load1
)( a
+ 0 );
7426 UInt oHi
= (UInt
)MC_(helperc_b_load1
)( a
+ 1 );
7427 return merge_origins(oLo
, oHi
);
7430 lineoff
= oc_line_offset(a
);
7431 byteoff
= a
& 3; /* 0 or 2 */
7433 if (OC_ENABLE_ASSERTIONS
) {
7434 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7436 line
= find_OCacheLine( a
);
7438 descr
= line
->descr
[lineoff
];
7439 if (OC_ENABLE_ASSERTIONS
) {
7440 tl_assert(descr
< 0x10);
7443 if (LIKELY(0 == (descr
& (3 << byteoff
)))) {
7446 return line
->w32
[lineoff
];
7450 UWord
VG_REGPARM(1) MC_(helperc_b_load4
)( Addr a
) {
7455 if (UNLIKELY(a
& 3)) {
7456 /* Handle misaligned case, slowly. */
7457 UInt oLo
= (UInt
)MC_(helperc_b_load2
)( a
+ 0 );
7458 UInt oHi
= (UInt
)MC_(helperc_b_load2
)( a
+ 2 );
7459 return merge_origins(oLo
, oHi
);
7462 lineoff
= oc_line_offset(a
);
7463 if (OC_ENABLE_ASSERTIONS
) {
7464 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7467 line
= find_OCacheLine( a
);
7469 descr
= line
->descr
[lineoff
];
7470 if (OC_ENABLE_ASSERTIONS
) {
7471 tl_assert(descr
< 0x10);
7474 if (LIKELY(0 == descr
)) {
7477 return line
->w32
[lineoff
];
7481 UWord
VG_REGPARM(1) MC_(helperc_b_load8
)( Addr a
) {
7483 UChar descrLo
, descrHi
, descr
;
7486 if (UNLIKELY(a
& 7)) {
7487 /* Handle misaligned case, slowly. */
7488 UInt oLo
= (UInt
)MC_(helperc_b_load4
)( a
+ 0 );
7489 UInt oHi
= (UInt
)MC_(helperc_b_load4
)( a
+ 4 );
7490 return merge_origins(oLo
, oHi
);
7493 lineoff
= oc_line_offset(a
);
7494 if (OC_ENABLE_ASSERTIONS
) {
7495 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7498 line
= find_OCacheLine( a
);
7500 descrLo
= line
->descr
[lineoff
+ 0];
7501 descrHi
= line
->descr
[lineoff
+ 1];
7502 descr
= descrLo
| descrHi
;
7503 if (OC_ENABLE_ASSERTIONS
) {
7504 tl_assert(descr
< 0x10);
7507 if (LIKELY(0 == descr
)) {
7508 return 0; /* both 32-bit chunks are defined */
7510 UInt oLo
= descrLo
== 0 ? 0 : line
->w32
[lineoff
+ 0];
7511 UInt oHi
= descrHi
== 0 ? 0 : line
->w32
[lineoff
+ 1];
7512 return merge_origins(oLo
, oHi
);
7516 UWord
VG_REGPARM(1) MC_(helperc_b_load16
)( Addr a
) {
7517 UInt oLo
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7518 UInt oHi
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7519 UInt oBoth
= merge_origins(oLo
, oHi
);
7520 return (UWord
)oBoth
;
7523 UWord
VG_REGPARM(1) MC_(helperc_b_load32
)( Addr a
) {
7524 UInt oQ0
= (UInt
)MC_(helperc_b_load8
)( a
+ 0 );
7525 UInt oQ1
= (UInt
)MC_(helperc_b_load8
)( a
+ 8 );
7526 UInt oQ2
= (UInt
)MC_(helperc_b_load8
)( a
+ 16 );
7527 UInt oQ3
= (UInt
)MC_(helperc_b_load8
)( a
+ 24 );
7528 UInt oAll
= merge_origins(merge_origins(oQ0
, oQ1
),
7529 merge_origins(oQ2
, oQ3
));
7534 /*--------------------------------------------*/
7535 /*--- Origin tracking: store handlers ---*/
7536 /*--------------------------------------------*/
7538 void VG_REGPARM(2) MC_(helperc_b_store1
)( Addr a
, UWord d32
) {
7540 UWord lineoff
= oc_line_offset(a
);
7541 UWord byteoff
= a
& 3; /* 0, 1, 2 or 3 */
7543 if (OC_ENABLE_ASSERTIONS
) {
7544 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7547 line
= find_OCacheLine( a
);
7550 line
->descr
[lineoff
] &= ~(1 << byteoff
);
7552 line
->descr
[lineoff
] |= (1 << byteoff
);
7553 line
->w32
[lineoff
] = d32
;
7557 void VG_REGPARM(2) MC_(helperc_b_store2
)( Addr a
, UWord d32
) {
7559 UWord lineoff
, byteoff
;
7561 if (UNLIKELY(a
& 1)) {
7562 /* Handle misaligned case, slowly. */
7563 MC_(helperc_b_store1
)( a
+ 0, d32
);
7564 MC_(helperc_b_store1
)( a
+ 1, d32
);
7568 lineoff
= oc_line_offset(a
);
7569 byteoff
= a
& 3; /* 0 or 2 */
7571 if (OC_ENABLE_ASSERTIONS
) {
7572 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7575 line
= find_OCacheLine( a
);
7578 line
->descr
[lineoff
] &= ~(3 << byteoff
);
7580 line
->descr
[lineoff
] |= (3 << byteoff
);
7581 line
->w32
[lineoff
] = d32
;
7585 void VG_REGPARM(2) MC_(helperc_b_store4
)( Addr a
, UWord d32
) {
7589 if (UNLIKELY(a
& 3)) {
7590 /* Handle misaligned case, slowly. */
7591 MC_(helperc_b_store2
)( a
+ 0, d32
);
7592 MC_(helperc_b_store2
)( a
+ 2, d32
);
7596 lineoff
= oc_line_offset(a
);
7597 if (OC_ENABLE_ASSERTIONS
) {
7598 tl_assert(lineoff
>= 0 && lineoff
< OC_W32S_PER_LINE
);
7601 line
= find_OCacheLine( a
);
7604 line
->descr
[lineoff
] = 0;
7606 line
->descr
[lineoff
] = 0xF;
7607 line
->w32
[lineoff
] = d32
;
7611 void VG_REGPARM(2) MC_(helperc_b_store8
)( Addr a
, UWord d32
) {
7615 if (UNLIKELY(a
& 7)) {
7616 /* Handle misaligned case, slowly. */
7617 MC_(helperc_b_store4
)( a
+ 0, d32
);
7618 MC_(helperc_b_store4
)( a
+ 4, d32
);
7622 lineoff
= oc_line_offset(a
);
7623 if (OC_ENABLE_ASSERTIONS
) {
7624 tl_assert(lineoff
== (lineoff
& 6)); /*0,2,4,6*//*since 8-aligned*/
7627 line
= find_OCacheLine( a
);
7630 line
->descr
[lineoff
+ 0] = 0;
7631 line
->descr
[lineoff
+ 1] = 0;
7633 line
->descr
[lineoff
+ 0] = 0xF;
7634 line
->descr
[lineoff
+ 1] = 0xF;
7635 line
->w32
[lineoff
+ 0] = d32
;
7636 line
->w32
[lineoff
+ 1] = d32
;
7640 void VG_REGPARM(2) MC_(helperc_b_store16
)( Addr a
, UWord d32
) {
7641 MC_(helperc_b_store8
)( a
+ 0, d32
);
7642 MC_(helperc_b_store8
)( a
+ 8, d32
);
7645 void VG_REGPARM(2) MC_(helperc_b_store32
)( Addr a
, UWord d32
) {
7646 MC_(helperc_b_store8
)( a
+ 0, d32
);
7647 MC_(helperc_b_store8
)( a
+ 8, d32
);
7648 MC_(helperc_b_store8
)( a
+ 16, d32
);
7649 MC_(helperc_b_store8
)( a
+ 24, d32
);
7653 /*--------------------------------------------*/
7654 /*--- Origin tracking: sarp handlers ---*/
7655 /*--------------------------------------------*/
7657 __attribute__((noinline
))
7658 static void ocache_sarp_Set_Origins ( Addr a
, UWord len
, UInt otag
) {
7659 if ((a
& 1) && len
>= 1) {
7660 MC_(helperc_b_store1
)( a
, otag
);
7664 if ((a
& 2) && len
>= 2) {
7665 MC_(helperc_b_store2
)( a
, otag
);
7670 tl_assert(0 == (a
& 3));
7672 MC_(helperc_b_store4
)( a
, otag
);
7677 MC_(helperc_b_store2
)( a
, otag
);
7682 MC_(helperc_b_store1
)( a
, otag
);
7686 tl_assert(len
== 0);
7689 __attribute__((noinline
))
7690 static void ocache_sarp_Clear_Origins ( Addr a
, UWord len
) {
7691 if ((a
& 1) && len
>= 1) {
7692 MC_(helperc_b_store1
)( a
, 0 );
7696 if ((a
& 2) && len
>= 2) {
7697 MC_(helperc_b_store2
)( a
, 0 );
7702 tl_assert(0 == (a
& 3));
7704 MC_(helperc_b_store4
)( a
, 0 );
7709 MC_(helperc_b_store2
)( a
, 0 );
7714 MC_(helperc_b_store1
)( a
, 0 );
7718 tl_assert(len
== 0);
7722 /*------------------------------------------------------------*/
7723 /*--- Setup and finalisation ---*/
7724 /*------------------------------------------------------------*/
7726 static void mc_post_clo_init ( void )
7728 /* If we've been asked to emit XML, mash around various other
7729 options so as to constrain the output somewhat. */
7731 /* Extract as much info as possible from the leak checker. */
7732 MC_(clo_leak_check
) = LC_Full
;
7735 if (MC_(clo_freelist_big_blocks
) >= MC_(clo_freelist_vol
)
7736 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7737 VG_(message
)(Vg_UserMsg
,
7738 "Warning: --freelist-big-blocks value %lld has no effect\n"
7739 "as it is >= to --freelist-vol value %lld\n",
7740 MC_(clo_freelist_big_blocks
),
7741 MC_(clo_freelist_vol
));
7744 if (MC_(clo_workaround_gcc296_bugs
)
7745 && VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
7747 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7748 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7753 tl_assert( MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3 );
7755 if (MC_(clo_mc_level
) == 3) {
7756 /* We're doing origin tracking. */
7757 # ifdef PERF_FAST_STACK
7758 VG_(track_new_mem_stack_4_w_ECU
) ( mc_new_mem_stack_4_w_ECU
);
7759 VG_(track_new_mem_stack_8_w_ECU
) ( mc_new_mem_stack_8_w_ECU
);
7760 VG_(track_new_mem_stack_12_w_ECU
) ( mc_new_mem_stack_12_w_ECU
);
7761 VG_(track_new_mem_stack_16_w_ECU
) ( mc_new_mem_stack_16_w_ECU
);
7762 VG_(track_new_mem_stack_32_w_ECU
) ( mc_new_mem_stack_32_w_ECU
);
7763 VG_(track_new_mem_stack_112_w_ECU
) ( mc_new_mem_stack_112_w_ECU
);
7764 VG_(track_new_mem_stack_128_w_ECU
) ( mc_new_mem_stack_128_w_ECU
);
7765 VG_(track_new_mem_stack_144_w_ECU
) ( mc_new_mem_stack_144_w_ECU
);
7766 VG_(track_new_mem_stack_160_w_ECU
) ( mc_new_mem_stack_160_w_ECU
);
7768 VG_(track_new_mem_stack_w_ECU
) ( mc_new_mem_stack_w_ECU
);
7769 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_make_ECU
);
7771 /* Not doing origin tracking */
7772 # ifdef PERF_FAST_STACK
7773 VG_(track_new_mem_stack_4
) ( mc_new_mem_stack_4
);
7774 VG_(track_new_mem_stack_8
) ( mc_new_mem_stack_8
);
7775 VG_(track_new_mem_stack_12
) ( mc_new_mem_stack_12
);
7776 VG_(track_new_mem_stack_16
) ( mc_new_mem_stack_16
);
7777 VG_(track_new_mem_stack_32
) ( mc_new_mem_stack_32
);
7778 VG_(track_new_mem_stack_112
) ( mc_new_mem_stack_112
);
7779 VG_(track_new_mem_stack_128
) ( mc_new_mem_stack_128
);
7780 VG_(track_new_mem_stack_144
) ( mc_new_mem_stack_144
);
7781 VG_(track_new_mem_stack_160
) ( mc_new_mem_stack_160
);
7783 VG_(track_new_mem_stack
) ( mc_new_mem_stack
);
7784 VG_(track_new_mem_stack_signal
) ( mc_new_mem_w_tid_no_ECU
);
7787 // We assume that brk()/sbrk() does not initialise new memory. Is this
7788 // accurate? John Reiser says:
7790 // 0) sbrk() can *decrease* process address space. No zero fill is done
7791 // for a decrease, not even the fragment on the high end of the last page
7792 // that is beyond the new highest address. For maximum safety and
7793 // portability, then the bytes in the last page that reside above [the
7794 // new] sbrk(0) should be considered to be uninitialized, but in practice
7795 // it is exceedingly likely that they will retain their previous
7798 // 1) If an increase is large enough to require new whole pages, then
7799 // those new whole pages (like all new pages) are zero-filled by the
7800 // operating system. So if sbrk(0) already is page aligned, then
7801 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7803 // 2) Any increase that lies within an existing allocated page is not
7804 // changed. So if (x = sbrk(0)) is not page aligned, then
7805 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7806 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7807 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7808 // of them come along for the ride because the operating system deals
7809 // only in whole pages. Again, for maximum safety and portability, then
7810 // anything that lives above [the new] sbrk(0) should be considered
7811 // uninitialized, but in practice will retain previous contents [zero in
7816 // A key property of sbrk/brk is that new whole pages that are supplied
7817 // by the operating system *do* get initialized to zero.
7819 // As for the portability of all this:
7821 // sbrk and brk are not POSIX. However, any system that is a derivative
7822 // of *nix has sbrk and brk because there are too many software (such as
7823 // the Bourne shell) which rely on the traditional memory map (.text,
7824 // .data+.bss, stack) and the existence of sbrk/brk.
7826 // So we should arguably observe all this. However:
7827 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7828 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7829 // doubt most programmers know the above information.
7830 // So I'm not terribly unhappy with marking it as undefined. --njn.
7832 // [More: I think most of what John said only applies to sbrk(). It seems
7833 // that brk() always deals in whole pages. And since this event deals
7834 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7835 // just mark all memory it allocates as defined.]
7837 # if !defined(VGO_solaris)
7838 if (MC_(clo_mc_level
) == 3)
7839 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_make_ECU
);
7841 VG_(track_new_mem_brk
) ( mc_new_mem_w_tid_no_ECU
);
7843 // On Solaris, brk memory has to be marked as defined, otherwise we get
7844 // many false positives.
7845 VG_(track_new_mem_brk
) ( make_mem_defined_w_tid
);
7848 /* This origin tracking cache is huge (~100M), so only initialise
7850 if (MC_(clo_mc_level
) >= 3) {
7852 tl_assert(ocacheL1
!= NULL
);
7853 tl_assert(ocacheL2
!= NULL
);
7855 tl_assert(ocacheL1
== NULL
);
7856 tl_assert(ocacheL2
== NULL
);
7859 MC_(chunk_poolalloc
) = VG_(newPA
)
7860 (sizeof(MC_Chunk
) + MC_(n_where_pointers
)() * sizeof(ExeContext
*),
7863 "mc.cMC.1 (MC_Chunk pools)",
7866 /* Do not check definedness of guest state if --undef-value-errors=no */
7867 if (MC_(clo_mc_level
) >= 2)
7868 VG_(track_pre_reg_read
) ( mc_pre_reg_read
);
7870 if (VG_(clo_xtree_memory
) == Vg_XTMemory_Full
) {
7871 if (MC_(clo_keep_stacktraces
) == KS_none
7872 || MC_(clo_keep_stacktraces
) == KS_free
)
7873 VG_(fmsg_bad_option
)("--keep-stacktraces",
7874 "To use --xtree-memory=full, you must"
7875 " keep at least the alloc stacktrace\n");
7876 // Activate full xtree memory profiling.
7877 VG_(XTMemory_Full_init
)(VG_(XT_filter_1top_and_maybe_below_main
));
7882 static void print_SM_info(const HChar
* type
, Int n_SMs
)
7884 VG_(message
)(Vg_DebugMsg
,
7885 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7888 n_SMs
* sizeof(SecMap
) / 1024UL,
7889 n_SMs
* sizeof(SecMap
) / (1024 * 1024UL) );
7892 static void mc_print_stats (void)
7894 SizeT max_secVBit_szB
, max_SMs_szB
, max_shmem_szB
;
7896 VG_(message
)(Vg_DebugMsg
, " memcheck: freelist: vol %lld length %lld\n",
7897 VG_(free_queue_volume
), VG_(free_queue_length
));
7898 VG_(message
)(Vg_DebugMsg
,
7899 " memcheck: sanity checks: %d cheap, %d expensive\n",
7900 n_sanity_cheap
, n_sanity_expensive
);
7901 VG_(message
)(Vg_DebugMsg
,
7902 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7904 n_auxmap_L2_nodes
* 64,
7905 n_auxmap_L2_nodes
/ 16 );
7906 VG_(message
)(Vg_DebugMsg
,
7907 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7908 n_auxmap_L1_searches
, n_auxmap_L1_cmps
,
7909 (10ULL * n_auxmap_L1_cmps
)
7910 / (n_auxmap_L1_searches
? n_auxmap_L1_searches
: 1)
7912 VG_(message
)(Vg_DebugMsg
,
7913 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7914 n_auxmap_L2_searches
, n_auxmap_L2_nodes
7917 print_SM_info("n_issued ", n_issued_SMs
);
7918 print_SM_info("n_deissued ", n_deissued_SMs
);
7919 print_SM_info("max_noaccess ", max_noaccess_SMs
);
7920 print_SM_info("max_undefined", max_undefined_SMs
);
7921 print_SM_info("max_defined ", max_defined_SMs
);
7922 print_SM_info("max_non_DSM ", max_non_DSM_SMs
);
7924 // Three DSMs, plus the non-DSM ones
7925 max_SMs_szB
= (3 + max_non_DSM_SMs
) * sizeof(SecMap
);
7926 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7927 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7928 // the elements on pointer size.
7929 // Note that the pool allocator has some additional small overhead
7930 // which is not counted in the below.
7931 // Hardwiring this logic sucks, but I don't see how else to do it.
7932 max_secVBit_szB
= max_secVBit_nodes
*
7933 (3*sizeof(Word
) + VG_ROUNDUP(sizeof(SecVBitNode
), sizeof(void*)));
7934 max_shmem_szB
= sizeof(primary_map
) + max_SMs_szB
+ max_secVBit_szB
;
7936 VG_(message
)(Vg_DebugMsg
,
7937 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
7938 max_secVBit_nodes
, max_secVBit_szB
/ 1024,
7939 max_secVBit_szB
/ (1024 * 1024));
7940 VG_(message
)(Vg_DebugMsg
,
7941 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
7942 sec_vbits_new_nodes
+ sec_vbits_updates
,
7943 sec_vbits_new_nodes
, sec_vbits_updates
);
7944 VG_(message
)(Vg_DebugMsg
,
7945 " memcheck: max shadow mem size: %luk, %luM\n",
7946 max_shmem_szB
/ 1024, max_shmem_szB
/ (1024 * 1024));
7948 if (MC_(clo_mc_level
) >= 3) {
7949 VG_(message
)(Vg_DebugMsg
,
7950 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
7951 stats_ocacheL1_find
,
7952 stats_ocacheL1_misses
,
7953 stats_ocacheL1_lossage
);
7954 VG_(message
)(Vg_DebugMsg
,
7955 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
7956 stats_ocacheL1_find
- stats_ocacheL1_misses
7957 - stats_ocacheL1_found_at_1
7958 - stats_ocacheL1_found_at_N
,
7959 stats_ocacheL1_found_at_1
);
7960 VG_(message
)(Vg_DebugMsg
,
7961 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
7962 stats_ocacheL1_found_at_N
,
7963 stats_ocacheL1_movefwds
);
7964 VG_(message
)(Vg_DebugMsg
,
7965 " ocacheL1: %'12lu sizeB %'12d useful\n",
7966 (SizeT
)sizeof(OCache
),
7967 4 * OC_W32S_PER_LINE
* OC_LINES_PER_SET
* OC_N_SETS
);
7968 VG_(message
)(Vg_DebugMsg
,
7969 " ocacheL2: %'12lu refs %'12lu misses\n",
7970 stats__ocacheL2_refs
,
7971 stats__ocacheL2_misses
);
7972 VG_(message
)(Vg_DebugMsg
,
7973 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
7974 stats__ocacheL2_n_nodes_max
,
7975 stats__ocacheL2_n_nodes
);
7976 VG_(message
)(Vg_DebugMsg
,
7977 " niacache: %'12lu refs %'12lu misses\n",
7978 stats__nia_cache_queries
, stats__nia_cache_misses
);
7980 tl_assert(ocacheL1
== NULL
);
7981 tl_assert(ocacheL2
== NULL
);
7986 static void mc_fini ( Int exitcode
)
7988 MC_(xtmemory_report
) (VG_(clo_xtree_memory_file
), True
);
7989 MC_(print_malloc_stats
)();
7991 if (MC_(clo_leak_check
) != LC_Off
) {
7992 LeakCheckParams lcp
;
7993 HChar
* xt_filename
= NULL
;
7994 lcp
.mode
= MC_(clo_leak_check
);
7995 lcp
.show_leak_kinds
= MC_(clo_show_leak_kinds
);
7996 lcp
.heuristics
= MC_(clo_leak_check_heuristics
);
7997 lcp
.errors_for_leak_kinds
= MC_(clo_error_for_leak_kinds
);
7998 lcp
.deltamode
= LCD_Any
;
7999 lcp
.max_loss_records_output
= 999999999;
8000 lcp
.requested_by_monitor_command
= False
;
8001 if (MC_(clo_xtree_leak
)) {
8002 xt_filename
= VG_(expand_file_name
)("--xtree-leak-file",
8003 MC_(clo_xtree_leak_file
));
8004 lcp
.xt_filename
= xt_filename
;
8006 lcp
.show_leak_kinds
= MC_(all_Reachedness
)();
8009 lcp
.xt_filename
= NULL
;
8010 MC_(detect_memory_leaks
)(1/*bogus ThreadId*/, &lcp
);
8011 if (MC_(clo_xtree_leak
))
8012 VG_(free
)(xt_filename
);
8014 if (VG_(clo_verbosity
) == 1 && !VG_(clo_xml
)) {
8016 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8022 if (MC_(any_value_errors
) && !VG_(clo_xml
) && VG_(clo_verbosity
) >= 1
8023 && MC_(clo_mc_level
) == 2) {
8024 VG_(message
)(Vg_UserMsg
,
8025 "Use --track-origins=yes to see where "
8026 "uninitialised values come from\n");
8029 /* Print a warning if any client-request generated ignore-ranges
8030 still exist. It would be reasonable to expect that a properly
8031 written program would remove any such ranges before exiting, and
8032 since they are a bit on the dangerous side, let's comment. By
8033 contrast ranges which are specified on the command line normally
8034 pertain to hardware mapped into the address space, and so we
8035 can't expect the client to have got rid of them. */
8036 if (gIgnoredAddressRanges
) {
8038 for (i
= 0; i
< VG_(sizeRangeMap
)(gIgnoredAddressRanges
); i
++) {
8039 UWord val
= IAR_INVALID
;
8040 UWord key_min
= ~(UWord
)0;
8041 UWord key_max
= (UWord
)0;
8042 VG_(indexRangeMap
)( &key_min
, &key_max
, &val
,
8043 gIgnoredAddressRanges
, i
);
8044 if (val
!= IAR_ClientReq
)
8046 /* Print the offending range. Also, if it is the first,
8047 print a banner before it. */
8051 "WARNING: exiting program has the following client-requested\n"
8052 "WARNING: address error disablement range(s) still in force,\n"
8054 "possibly as a result of some mistake in the use of the\n"
8056 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8059 VG_(umsg
)(" [%u] 0x%016lx-0x%016lx %s\n",
8060 i
, key_min
, key_max
, showIARKind(val
));
8070 VG_(message
)(Vg_DebugMsg
,
8071 "------ Valgrind's client block stats follow ---------------\n" );
8072 show_client_block_stats();
8076 /* mark the given addr/len unaddressable for watchpoint implementation
8077 The PointKind will be handled at access time */
8078 static Bool
mc_mark_unaddressable_for_watchpoint (PointKind kind
, Bool insert
,
8079 Addr addr
, SizeT len
)
8081 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8082 accessibility and definedness in gdbserver so as to allow restoring it
8083 properly. Currently, we assume that the user only watches things
8084 which are properly addressable and defined */
8086 MC_(make_mem_noaccess
) (addr
, len
);
8088 MC_(make_mem_defined
) (addr
, len
);
8092 static void mc_pre_clo_init(void)
8094 VG_(details_name
) ("Memcheck");
8095 VG_(details_version
) (NULL
);
8096 VG_(details_description
) ("a memory error detector");
8097 VG_(details_copyright_author
)(
8098 "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
8099 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
8100 VG_(details_avg_translation_sizeB
) ( 640 );
8102 VG_(basic_tool_funcs
) (mc_post_clo_init
,
8106 VG_(needs_final_IR_tidy_pass
) ( MC_(final_tidy
) );
8109 VG_(needs_core_errors
) ();
8110 VG_(needs_tool_errors
) (MC_(eq_Error
),
8111 MC_(before_pp_Error
),
8113 True
,/*show TIDs for errors*/
8114 MC_(update_Error_extra
),
8115 MC_(is_recognised_suppression
),
8116 MC_(read_extra_suppression_info
),
8117 MC_(error_matches_suppression
),
8118 MC_(get_error_name
),
8119 MC_(get_extra_suppression_info
),
8120 MC_(print_extra_suppression_use
),
8121 MC_(update_extra_suppression_use
));
8122 VG_(needs_libc_freeres
) ();
8123 VG_(needs_cxx_freeres
) ();
8124 VG_(needs_command_line_options
)(mc_process_cmd_line_options
,
8126 mc_print_debug_usage
);
8127 VG_(needs_client_requests
) (mc_handle_client_request
);
8128 VG_(needs_sanity_checks
) (mc_cheap_sanity_check
,
8129 mc_expensive_sanity_check
);
8130 VG_(needs_print_stats
) (mc_print_stats
);
8131 VG_(needs_info_location
) (MC_(pp_describe_addr
));
8132 VG_(needs_malloc_replacement
) (MC_(malloc
),
8134 MC_(__builtin_vec_new
),
8138 MC_(__builtin_delete
),
8139 MC_(__builtin_vec_delete
),
8141 MC_(malloc_usable_size
),
8142 MC_MALLOC_DEFAULT_REDZONE_SZB
);
8143 MC_(Malloc_Redzone_SzB
) = VG_(malloc_effective_client_redzone_size
)();
8145 VG_(needs_xml_output
) ();
8147 VG_(track_new_mem_startup
) ( mc_new_mem_startup
);
8149 // Handling of mmap and mprotect isn't simple (well, it is simple,
8150 // but the justification isn't.) See comments above, just prior to
8152 VG_(track_new_mem_mmap
) ( mc_new_mem_mmap
);
8153 VG_(track_change_mem_mprotect
) ( mc_new_mem_mprotect
);
8155 VG_(track_copy_mem_remap
) ( MC_(copy_address_range_state
) );
8157 VG_(track_die_mem_stack_signal
)( MC_(make_mem_noaccess
) );
8158 VG_(track_die_mem_brk
) ( MC_(make_mem_noaccess
) );
8159 VG_(track_die_mem_munmap
) ( MC_(make_mem_noaccess
) );
8161 /* Defer the specification of the new_mem_stack functions to the
8162 post_clo_init function, since we need to first parse the command
8163 line before deciding which set to use. */
8165 # ifdef PERF_FAST_STACK
8166 VG_(track_die_mem_stack_4
) ( mc_die_mem_stack_4
);
8167 VG_(track_die_mem_stack_8
) ( mc_die_mem_stack_8
);
8168 VG_(track_die_mem_stack_12
) ( mc_die_mem_stack_12
);
8169 VG_(track_die_mem_stack_16
) ( mc_die_mem_stack_16
);
8170 VG_(track_die_mem_stack_32
) ( mc_die_mem_stack_32
);
8171 VG_(track_die_mem_stack_112
) ( mc_die_mem_stack_112
);
8172 VG_(track_die_mem_stack_128
) ( mc_die_mem_stack_128
);
8173 VG_(track_die_mem_stack_144
) ( mc_die_mem_stack_144
);
8174 VG_(track_die_mem_stack_160
) ( mc_die_mem_stack_160
);
8176 VG_(track_die_mem_stack
) ( mc_die_mem_stack
);
8178 VG_(track_ban_mem_stack
) ( MC_(make_mem_noaccess
) );
8180 VG_(track_pre_mem_read
) ( check_mem_is_defined
);
8181 VG_(track_pre_mem_read_asciiz
) ( check_mem_is_defined_asciiz
);
8182 VG_(track_pre_mem_write
) ( check_mem_is_addressable
);
8183 VG_(track_post_mem_write
) ( mc_post_mem_write
);
8185 VG_(track_post_reg_write
) ( mc_post_reg_write
);
8186 VG_(track_post_reg_write_clientcall_return
)( mc_post_reg_write_clientcall
);
8188 if (MC_(clo_mc_level
) >= 2) {
8189 VG_(track_copy_mem_to_reg
) ( mc_copy_mem_to_reg
);
8190 VG_(track_copy_reg_to_mem
) ( mc_copy_reg_to_mem
);
8193 VG_(needs_watchpoint
) ( mc_mark_unaddressable_for_watchpoint
);
8195 init_shadow_memory();
8196 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8197 tl_assert(MC_(chunk_poolalloc
) == NULL
);
8198 MC_(malloc_list
) = VG_(HT_construct
)( "MC_(malloc_list)" );
8199 MC_(mempool_list
) = VG_(HT_construct
)( "MC_(mempool_list)" );
8202 tl_assert( mc_expensive_sanity_check() );
8204 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8205 tl_assert(sizeof(UWord
) == sizeof(Addr
));
8206 // Call me paranoid. I don't care.
8207 tl_assert(sizeof(void*) == sizeof(Addr
));
8209 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8210 tl_assert(-1 != VG_(log2
)(BYTES_PER_SEC_VBIT_NODE
));
8212 /* This is small. Always initialise it. */
8213 init_nia_to_ecu_cache();
8215 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8216 if we need to, since the command line args haven't been
8217 processed yet. Hence defer it to mc_post_clo_init. */
8218 tl_assert(ocacheL1
== NULL
);
8219 tl_assert(ocacheL2
== NULL
);
8221 /* Check some important stuff. See extensive comments above
8222 re UNALIGNED_OR_HIGH for background. */
8223 # if VG_WORDSIZE == 4
8224 tl_assert(sizeof(void*) == 4);
8225 tl_assert(sizeof(Addr
) == 4);
8226 tl_assert(sizeof(UWord
) == 4);
8227 tl_assert(sizeof(Word
) == 4);
8228 tl_assert(MAX_PRIMARY_ADDRESS
== 0xFFFFFFFFUL
);
8229 tl_assert(MASK(1) == 0UL);
8230 tl_assert(MASK(2) == 1UL);
8231 tl_assert(MASK(4) == 3UL);
8232 tl_assert(MASK(8) == 7UL);
8234 tl_assert(VG_WORDSIZE
== 8);
8235 tl_assert(sizeof(void*) == 8);
8236 tl_assert(sizeof(Addr
) == 8);
8237 tl_assert(sizeof(UWord
) == 8);
8238 tl_assert(sizeof(Word
) == 8);
8239 tl_assert(MAX_PRIMARY_ADDRESS
== 0x1FFFFFFFFFULL
);
8240 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL
);
8241 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL
);
8242 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL
);
8243 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL
);
8246 /* Check some assertions to do with the instrumentation machinery. */
8247 MC_(do_instrumentation_startup_checks
)();
8250 STATIC_ASSERT(sizeof(UWord
) == sizeof(SizeT
));
8252 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init
)
8254 /*--------------------------------------------------------------------*/
8255 /*--- end mc_main.c ---*/
8256 /*--------------------------------------------------------------------*/