coverity: fix a couple of forward nulls
[valgrind.git] / memcheck / mc_main.c
blob3f34e3dc19c53619329b373f47fc90d813bf283c
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
6 /*--- mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
9 /*
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
14 jseward@acm.org
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_aspacemgr.h"
34 #include "pub_tool_gdbserver.h"
35 #include "pub_tool_poolalloc.h"
36 #include "pub_tool_hashtable.h" // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_rangemap.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48 #include "pub_tool_xarray.h"
49 #include "pub_tool_xtree.h"
50 #include "pub_tool_xtmemory.h"
52 #include "mc_include.h"
53 #include "memcheck.h" /* for client requests */
55 /* Set to 1 to do a little more sanity checking */
56 #define VG_DEBUG_MEMORY 0
58 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
60 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
61 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
64 /*------------------------------------------------------------*/
65 /*--- Fast-case knobs ---*/
66 /*------------------------------------------------------------*/
68 // Comment these out to disable the fast cases (don't just set them to zero).
70 /* PERF_FAST_LOADV is in mc_include.h */
71 #define PERF_FAST_STOREV 1
73 #define PERF_FAST_SARP 1
75 #define PERF_FAST_STACK 1
76 #define PERF_FAST_STACK2 1
78 /* Change this to 1 to enable assertions on origin tracking cache fast
79 paths */
80 #define OC_ENABLE_ASSERTIONS 0
82 /* Change this to 1 for experimental, higher precision origin tracking
83 8- and 16-bit store handling. */
84 #define OC_PRECISION_STORE 1
87 /*------------------------------------------------------------*/
88 /*--- Comments on the origin tracking implementation ---*/
89 /*------------------------------------------------------------*/
91 /* See detailed comment entitled
92 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
93 which is contained further on in this file. */
96 /*------------------------------------------------------------*/
97 /*--- V bits and A bits ---*/
98 /*------------------------------------------------------------*/
100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
101 thinks the corresponding value bit is defined. And every memory byte
102 has an A bit, which tracks whether Memcheck thinks the program can access
103 it safely (ie. it's mapped, and has at least one of the RWX permission bits
104 set). So every N-bit register is shadowed with N V bits, and every memory
105 byte is shadowed with 8 V bits and one A bit.
107 In the implementation, we use two forms of compression (compressed V bits
108 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
109 for memory.
111 Memcheck also tracks extra information about each heap block that is
112 allocated, for detecting memory leaks and other purposes.
115 /*------------------------------------------------------------*/
116 /*--- Basic A/V bitmap representation. ---*/
117 /*------------------------------------------------------------*/
119 /* All reads and writes are checked against a memory map (a.k.a. shadow
120 memory), which records the state of all memory in the process.
122 On 32-bit machines the memory map is organised as follows.
123 The top 16 bits of an address are used to index into a top-level
124 map table, containing 65536 entries. Each entry is a pointer to a
125 second-level map, which records the accesibililty and validity
126 permissions for the 65536 bytes indexed by the lower 16 bits of the
127 address. Each byte is represented by two bits (details are below). So
128 each second-level map contains 16384 bytes. This two-level arrangement
129 conveniently divides the 4G address space into 64k lumps, each size 64k
130 bytes.
132 All entries in the primary (top-level) map must point to a valid
133 secondary (second-level) map. Since many of the 64kB chunks will
134 have the same status for every bit -- ie. noaccess (for unused
135 address space) or entirely addressable and defined (for code segments) --
136 there are three distinguished secondary maps, which indicate 'noaccess',
137 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
138 map entry points to the relevant distinguished map. In practice,
139 typically more than half of the addressable memory is represented with
140 the 'undefined' or 'defined' distinguished secondary map, so it gives a
141 good saving. It also lets us set the V+A bits of large address regions
142 quickly in set_address_range_perms().
144 On 64-bit machines it's more complicated. If we followed the same basic
145 scheme we'd have a four-level table which would require too many memory
146 accesses. So instead the top-level map table has 2^20 entries (indexed
147 using bits 16..35 of the address); this covers the bottom 64GB. Any
148 accesses above 64GB are handled with a slow, sparse auxiliary table.
149 Valgrind's address space manager tries very hard to keep things below
150 this 64GB barrier so that performance doesn't suffer too much.
152 Note that this file has a lot of different functions for reading and
153 writing shadow memory. Only a couple are strictly necessary (eg.
154 get_vabits2 and set_vabits2), most are just specialised for specific
155 common cases to improve performance.
157 Aside: the V+A bits are less precise than they could be -- we have no way
158 of marking memory as read-only. It would be great if we could add an
159 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
160 which requires 2.3 bits to hold, and there's no way to do that elegantly
161 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
162 seem worth it.
165 /* --------------- Basic configuration --------------- */
167 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
169 #if VG_WORDSIZE == 4
171 /* cover the entire address space */
172 # define N_PRIMARY_BITS 16
174 #else
176 /* Just handle the first 128G fast and the rest via auxiliary
177 primaries. If you change this, Memcheck will assert at startup.
178 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
179 # define N_PRIMARY_BITS 21
181 #endif
184 /* Do not change this. */
185 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
187 /* Do not change this. */
188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
191 /* --------------- Secondary maps --------------- */
193 // Each byte of memory conceptually has an A bit, which indicates its
194 // addressability, and 8 V bits, which indicates its definedness.
196 // But because very few bytes are partially defined, we can use a nice
197 // compression scheme to reduce the size of shadow memory. Each byte of
198 // memory has 2 bits which indicates its state (ie. V+A bits):
200 // 00: noaccess (unaddressable but treated as fully defined)
201 // 01: undefined (addressable and fully undefined)
202 // 10: defined (addressable and fully defined)
203 // 11: partdefined (addressable and partially defined)
205 // In the "partdefined" case, we use a secondary table to store the V bits.
206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
207 // bits.
209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
210 // four bytes (32 bits) of memory are in each chunk. Hence the name
211 // "vabits8". This lets us get the V+A bits for four bytes at a time
212 // easily (without having to do any shifting and/or masking), and that is a
213 // very common operation. (Note that although each vabits8 chunk
214 // is 8 bits in size, it represents 32 bits of memory.)
216 // The representation is "inverse" little-endian... each 4 bytes of
217 // memory is represented by a 1 byte value, where:
219 // - the status of byte (a+0) is held in bits [1..0]
220 // - the status of byte (a+1) is held in bits [3..2]
221 // - the status of byte (a+2) is held in bits [5..4]
222 // - the status of byte (a+3) is held in bits [7..6]
224 // It's "inverse" because endianness normally describes a mapping from
225 // value bits to memory addresses; in this case the mapping is inverted.
226 // Ie. instead of particular value bits being held in certain addresses, in
227 // this case certain addresses are represented by particular value bits.
228 // See insert_vabits2_into_vabits8() for an example.
230 // But note that we don't compress the V bits stored in registers; they
231 // need to be explicit to made the shadow operations possible. Therefore
232 // when moving values between registers and memory we need to convert
233 // between the expanded in-register format and the compressed in-memory
234 // format. This isn't so difficult, it just requires careful attention in a
235 // few places.
237 // These represent eight bits of memory.
238 #define VA_BITS2_NOACCESS 0x0 // 00b
239 #define VA_BITS2_UNDEFINED 0x1 // 01b
240 #define VA_BITS2_DEFINED 0x2 // 10b
241 #define VA_BITS2_PARTDEFINED 0x3 // 11b
243 // These represent 16 bits of memory.
244 #define VA_BITS4_NOACCESS 0x0 // 00_00b
245 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
246 #define VA_BITS4_DEFINED 0xa // 10_10b
248 // These represent 32 bits of memory.
249 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
250 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
251 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
253 // These represent 64 bits of memory.
254 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
255 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
256 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
258 // These represent 128 bits of memory.
259 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
262 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
263 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
264 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
266 // Paranoia: it's critical for performance that the requested inlining
267 // occurs. So try extra hard.
268 #define INLINE inline __attribute__((always_inline))
270 static INLINE Addr start_of_this_sm ( Addr a ) {
271 return (a & (~SM_MASK));
273 static INLINE Bool is_start_of_sm ( Addr a ) {
274 return (start_of_this_sm(a) == a);
277 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
279 typedef
280 union {
281 UChar vabits8[SM_CHUNKS];
282 UShort vabits16[SM_CHUNKS/2];
284 SecMap;
286 // 3 distinguished secondary maps, one for no-access, one for
287 // accessible but undefined, and one for accessible and defined.
288 // Distinguished secondaries may never be modified.
289 #define SM_DIST_NOACCESS 0
290 #define SM_DIST_UNDEFINED 1
291 #define SM_DIST_DEFINED 2
293 static SecMap sm_distinguished[3];
295 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
296 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
299 // Forward declaration
300 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
302 /* dist_sm points to one of our three distinguished secondaries. Make
303 a copy of it so that we can write to it.
305 static SecMap* copy_for_writing ( SecMap* dist_sm )
307 SecMap* new_sm;
308 tl_assert(dist_sm == &sm_distinguished[0]
309 || dist_sm == &sm_distinguished[1]
310 || dist_sm == &sm_distinguished[2]);
312 SysRes sres = VG_(am_shadow_alloc)(sizeof(SecMap));
313 if (sr_isError(sres))
314 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
315 sizeof(SecMap), sr_Err(sres) );
316 new_sm = (void *)(Addr)sr_Res(sres);
317 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
318 update_SM_counts(dist_sm, new_sm);
319 return new_sm;
322 /* --------------- Stats --------------- */
324 static Int n_issued_SMs = 0;
325 static Int n_deissued_SMs = 0;
326 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
327 static Int n_undefined_SMs = 0;
328 static Int n_defined_SMs = 0;
329 static Int n_non_DSM_SMs = 0;
330 static Int max_noaccess_SMs = 0;
331 static Int max_undefined_SMs = 0;
332 static Int max_defined_SMs = 0;
333 static Int max_non_DSM_SMs = 0;
335 /* # searches initiated in auxmap_L1, and # base cmps required */
336 static ULong n_auxmap_L1_searches = 0;
337 static ULong n_auxmap_L1_cmps = 0;
338 /* # of searches that missed in auxmap_L1 and therefore had to
339 be handed to auxmap_L2. And the number of nodes inserted. */
340 static ULong n_auxmap_L2_searches = 0;
341 static ULong n_auxmap_L2_nodes = 0;
343 static Int n_sanity_cheap = 0;
344 static Int n_sanity_expensive = 0;
346 static Int n_secVBit_nodes = 0;
347 static Int max_secVBit_nodes = 0;
349 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
351 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
352 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
353 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
354 else { n_non_DSM_SMs --;
355 n_deissued_SMs ++; }
357 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
358 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
359 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
360 else { n_non_DSM_SMs ++;
361 n_issued_SMs ++; }
363 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
364 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
365 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
366 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
369 /* --------------- Primary maps --------------- */
371 /* The main primary map. This covers some initial part of the address
372 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
373 handled using the auxiliary primary map.
375 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
376 && (defined(VGP_arm_linux) \
377 || defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
378 /* mc_main_asm.c needs visibility on a few things declared in this file.
379 MC_MAIN_STATIC allows to define them static if ok, i.e. on
380 platforms that are not using hand-coded asm statements. */
381 #define MC_MAIN_STATIC
382 #else
383 #define MC_MAIN_STATIC static
384 #endif
385 MC_MAIN_STATIC SecMap* primary_map[N_PRIMARY_MAP];
388 /* An entry in the auxiliary primary map. base must be a 64k-aligned
389 value, and sm points at the relevant secondary map. As with the
390 main primary map, the secondary may be either a real secondary, or
391 one of the three distinguished secondaries. DO NOT CHANGE THIS
392 LAYOUT: the first word has to be the key for OSet fast lookups.
394 typedef
395 struct {
396 Addr base;
397 SecMap* sm;
399 AuxMapEnt;
401 /* Tunable parameter: How big is the L1 queue? */
402 #define N_AUXMAP_L1 24
404 /* Tunable parameter: How far along the L1 queue to insert
405 entries resulting from L2 lookups? */
406 #define AUXMAP_L1_INSERT_IX 12
408 static struct {
409 Addr base;
410 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
412 auxmap_L1[N_AUXMAP_L1];
414 static OSet* auxmap_L2 = NULL;
416 static void init_auxmap_L1_L2 ( void )
418 Int i;
419 for (i = 0; i < N_AUXMAP_L1; i++) {
420 auxmap_L1[i].base = 0;
421 auxmap_L1[i].ent = NULL;
424 tl_assert(0 == offsetof(AuxMapEnt,base));
425 tl_assert(sizeof(Addr) == sizeof(void*));
426 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
427 /*fastCmp*/ NULL,
428 VG_(malloc), "mc.iaLL.1", VG_(free) );
431 /* Check representation invariants; if OK return NULL; else a
432 descriptive bit of text. Also return the number of
433 non-distinguished secondary maps referred to from the auxiliary
434 primary maps. */
436 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
438 Word i, j;
439 /* On a 32-bit platform, the L2 and L1 tables should
440 both remain empty forever.
442 On a 64-bit platform:
443 In the L2 table:
444 all .base & 0xFFFF == 0
445 all .base > MAX_PRIMARY_ADDRESS
446 In the L1 table:
447 all .base & 0xFFFF == 0
448 all (.base > MAX_PRIMARY_ADDRESS
449 .base & 0xFFFF == 0
450 and .ent points to an AuxMapEnt with the same .base)
452 (.base == 0 and .ent == NULL)
454 *n_secmaps_found = 0;
455 if (sizeof(void*) == 4) {
456 /* 32-bit platform */
457 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
458 return "32-bit: auxmap_L2 is non-empty";
459 for (i = 0; i < N_AUXMAP_L1; i++)
460 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
461 return "32-bit: auxmap_L1 is non-empty";
462 } else {
463 /* 64-bit platform */
464 UWord elems_seen = 0;
465 AuxMapEnt *elem, *res;
466 AuxMapEnt key;
467 /* L2 table */
468 VG_(OSetGen_ResetIter)(auxmap_L2);
469 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
470 elems_seen++;
471 if (0 != (elem->base & (Addr)0xFFFF))
472 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
473 if (elem->base <= MAX_PRIMARY_ADDRESS)
474 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
475 if (elem->sm == NULL)
476 return "64-bit: .sm in _L2 is NULL";
477 if (!is_distinguished_sm(elem->sm))
478 (*n_secmaps_found)++;
480 if (elems_seen != n_auxmap_L2_nodes)
481 return "64-bit: disagreement on number of elems in _L2";
482 /* Check L1-L2 correspondence */
483 for (i = 0; i < N_AUXMAP_L1; i++) {
484 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
485 continue;
486 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
487 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
488 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
489 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
490 if (auxmap_L1[i].ent == NULL)
491 return "64-bit: .ent is NULL in auxmap_L1";
492 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
493 return "64-bit: _L1 and _L2 bases are inconsistent";
494 /* Look it up in auxmap_L2. */
495 key.base = auxmap_L1[i].base;
496 key.sm = 0;
497 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
498 if (res == NULL)
499 return "64-bit: _L1 .base not found in _L2";
500 if (res != auxmap_L1[i].ent)
501 return "64-bit: _L1 .ent disagrees with _L2 entry";
503 /* Check L1 contains no duplicates */
504 for (i = 0; i < N_AUXMAP_L1; i++) {
505 if (auxmap_L1[i].base == 0)
506 continue;
507 for (j = i+1; j < N_AUXMAP_L1; j++) {
508 if (auxmap_L1[j].base == 0)
509 continue;
510 if (auxmap_L1[j].base == auxmap_L1[i].base)
511 return "64-bit: duplicate _L1 .base entries";
515 return NULL; /* ok */
518 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
520 Word i;
521 tl_assert(ent);
522 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
523 for (i = N_AUXMAP_L1-1; i > rank; i--)
524 auxmap_L1[i] = auxmap_L1[i-1];
525 auxmap_L1[rank].base = ent->base;
526 auxmap_L1[rank].ent = ent;
529 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
531 AuxMapEnt key;
532 AuxMapEnt* res;
533 Word i;
535 tl_assert(a > MAX_PRIMARY_ADDRESS);
536 a &= ~(Addr)0xFFFF;
538 /* First search the front-cache, which is a self-organising
539 list containing the most popular entries. */
541 if (LIKELY(auxmap_L1[0].base == a))
542 return auxmap_L1[0].ent;
543 if (LIKELY(auxmap_L1[1].base == a)) {
544 Addr t_base = auxmap_L1[0].base;
545 AuxMapEnt* t_ent = auxmap_L1[0].ent;
546 auxmap_L1[0].base = auxmap_L1[1].base;
547 auxmap_L1[0].ent = auxmap_L1[1].ent;
548 auxmap_L1[1].base = t_base;
549 auxmap_L1[1].ent = t_ent;
550 return auxmap_L1[0].ent;
553 n_auxmap_L1_searches++;
555 for (i = 0; i < N_AUXMAP_L1; i++) {
556 if (auxmap_L1[i].base == a) {
557 break;
560 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
562 n_auxmap_L1_cmps += (ULong)(i+1);
564 if (i < N_AUXMAP_L1) {
565 if (i > 0) {
566 Addr t_base = auxmap_L1[i-1].base;
567 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
568 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
569 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
570 auxmap_L1[i-0].base = t_base;
571 auxmap_L1[i-0].ent = t_ent;
572 i--;
574 return auxmap_L1[i].ent;
577 n_auxmap_L2_searches++;
579 /* First see if we already have it. */
580 key.base = a;
581 key.sm = 0;
583 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
584 if (res)
585 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
586 return res;
589 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
591 AuxMapEnt *nyu, *res;
593 /* First see if we already have it. */
594 res = maybe_find_in_auxmap( a );
595 if (LIKELY(res))
596 return res;
598 /* Ok, there's no entry in the secondary map, so we'll have
599 to allocate one. */
600 a &= ~(Addr)0xFFFF;
602 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
603 nyu->base = a;
604 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
605 VG_(OSetGen_Insert)( auxmap_L2, nyu );
606 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
607 n_auxmap_L2_nodes++;
608 return nyu;
611 /* --------------- SecMap fundamentals --------------- */
613 // In all these, 'low' means it's definitely in the main primary map,
614 // 'high' means it's definitely in the auxiliary table.
616 static INLINE UWord get_primary_map_low_offset ( Addr a )
618 UWord pm_off = a >> 16;
619 return pm_off;
622 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
624 UWord pm_off = a >> 16;
625 # if VG_DEBUG_MEMORY >= 1
626 tl_assert(pm_off < N_PRIMARY_MAP);
627 # endif
628 return &primary_map[ pm_off ];
631 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
633 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
634 return &am->sm;
637 static INLINE SecMap** get_secmap_ptr ( Addr a )
639 return ( a <= MAX_PRIMARY_ADDRESS
640 ? get_secmap_low_ptr(a)
641 : get_secmap_high_ptr(a));
644 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
646 return *get_secmap_low_ptr(a);
649 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
651 return *get_secmap_high_ptr(a);
654 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
656 SecMap** p = get_secmap_low_ptr(a);
657 if (UNLIKELY(is_distinguished_sm(*p)))
658 *p = copy_for_writing(*p);
659 return *p;
662 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
664 SecMap** p = get_secmap_high_ptr(a);
665 if (UNLIKELY(is_distinguished_sm(*p)))
666 *p = copy_for_writing(*p);
667 return *p;
670 /* Produce the secmap for 'a', either from the primary map or by
671 ensuring there is an entry for it in the aux primary map. The
672 secmap may be a distinguished one as the caller will only want to
673 be able to read it.
675 static INLINE SecMap* get_secmap_for_reading ( Addr a )
677 return ( a <= MAX_PRIMARY_ADDRESS
678 ? get_secmap_for_reading_low (a)
679 : get_secmap_for_reading_high(a) );
682 /* Produce the secmap for 'a', either from the primary map or by
683 ensuring there is an entry for it in the aux primary map. The
684 secmap may not be a distinguished one, since the caller will want
685 to be able to write it. If it is a distinguished secondary, make a
686 writable copy of it, install it, and return the copy instead. (COW
687 semantics).
689 static INLINE SecMap* get_secmap_for_writing ( Addr a )
691 return ( a <= MAX_PRIMARY_ADDRESS
692 ? get_secmap_for_writing_low (a)
693 : get_secmap_for_writing_high(a) );
696 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
697 allocate one if one doesn't already exist. This is used by the
698 leak checker.
700 static SecMap* maybe_get_secmap_for ( Addr a )
702 if (a <= MAX_PRIMARY_ADDRESS) {
703 return get_secmap_for_reading_low(a);
704 } else {
705 AuxMapEnt* am = maybe_find_in_auxmap(a);
706 return am ? am->sm : NULL;
710 /* --------------- Fundamental functions --------------- */
712 static INLINE
713 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
715 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
716 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
717 *vabits8 |= (vabits2 << shift); // mask in the two new bits
720 static INLINE
721 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
723 UInt shift;
724 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
725 shift = (a & 2) << 1; // shift by 0 or 4
726 *vabits8 &= ~(0xf << shift); // mask out the four old bits
727 *vabits8 |= (vabits4 << shift); // mask in the four new bits
730 static INLINE
731 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
733 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
734 vabits8 >>= shift; // shift the two bits to the bottom
735 return 0x3 & vabits8; // mask out the rest
738 static INLINE
739 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
741 UInt shift;
742 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
743 shift = (a & 2) << 1; // shift by 0 or 4
744 vabits8 >>= shift; // shift the four bits to the bottom
745 return 0xf & vabits8; // mask out the rest
748 // Note that these four are only used in slow cases. The fast cases do
749 // clever things like combine the auxmap check (in
750 // get_secmap_{read,writ}able) with alignment checks.
752 // *** WARNING! ***
753 // Any time this function is called, if it is possible that vabits2
754 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
755 // sec-V-bits table must also be set!
756 static INLINE
757 void set_vabits2 ( Addr a, UChar vabits2 )
759 SecMap* sm = get_secmap_for_writing(a);
760 UWord sm_off = SM_OFF(a);
761 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
764 static INLINE
765 UChar get_vabits2 ( Addr a )
767 SecMap* sm = get_secmap_for_reading(a);
768 UWord sm_off = SM_OFF(a);
769 UChar vabits8 = sm->vabits8[sm_off];
770 return extract_vabits2_from_vabits8(a, vabits8);
773 // *** WARNING! ***
774 // Any time this function is called, if it is possible that any of the
775 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
776 // corresponding entry(s) in the sec-V-bits table must also be set!
777 static INLINE
778 UChar get_vabits8_for_aligned_word32 ( Addr a )
780 SecMap* sm = get_secmap_for_reading(a);
781 UWord sm_off = SM_OFF(a);
782 UChar vabits8 = sm->vabits8[sm_off];
783 return vabits8;
786 static INLINE
787 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
789 SecMap* sm = get_secmap_for_writing(a);
790 UWord sm_off = SM_OFF(a);
791 sm->vabits8[sm_off] = vabits8;
795 // Forward declarations
796 static UWord get_sec_vbits8(Addr a);
797 static void set_sec_vbits8(Addr a, UWord vbits8);
799 // Returns False if there was an addressability error.
800 static INLINE
801 Bool set_vbits8 ( Addr a, UChar vbits8 )
803 Bool ok = True;
804 UChar vabits2 = get_vabits2(a);
805 if ( VA_BITS2_NOACCESS != vabits2 ) {
806 // Addressable. Convert in-register format to in-memory format.
807 // Also remove any existing sec V bit entry for the byte if no
808 // longer necessary.
809 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
810 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
811 else { vabits2 = VA_BITS2_PARTDEFINED;
812 set_sec_vbits8(a, vbits8); }
813 set_vabits2(a, vabits2);
815 } else {
816 // Unaddressable! Do nothing -- when writing to unaddressable
817 // memory it acts as a black hole, and the V bits can never be seen
818 // again. So we don't have to write them at all.
819 ok = False;
821 return ok;
824 // Returns False if there was an addressability error. In that case, we put
825 // all defined bits into vbits8.
826 static INLINE
827 Bool get_vbits8 ( Addr a, UChar* vbits8 )
829 Bool ok = True;
830 UChar vabits2 = get_vabits2(a);
832 // Convert the in-memory format to in-register format.
833 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
834 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
835 else if ( VA_BITS2_NOACCESS == vabits2 ) {
836 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
837 ok = False;
838 } else {
839 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
840 *vbits8 = get_sec_vbits8(a);
842 return ok;
846 /* --------------- Secondary V bit table ------------ */
848 // This table holds the full V bit pattern for partially-defined bytes
849 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
850 // memory.
852 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
853 // then overwrite the same address with a fully defined byte, the sec-V-bit
854 // node will not necessarily be removed. This is because checking for
855 // whether removal is necessary would slow down the fast paths.
857 // To avoid the stale nodes building up too much, we periodically (once the
858 // table reaches a certain size) garbage collect (GC) the table by
859 // traversing it and evicting any nodes not having PDB.
860 // If more than a certain proportion of nodes survived, we increase the
861 // table size so that GCs occur less often.
863 // This policy is designed to avoid bad table bloat in the worst case where
864 // a program creates huge numbers of stale PDBs -- we would get this bloat
865 // if we had no GC -- while handling well the case where a node becomes
866 // stale but shortly afterwards is rewritten with a PDB and so becomes
867 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
868 // remove all stale nodes as soon as possible, we just end up re-adding a
869 // lot of them in later again. The "sufficiently stale" approach avoids
870 // this. (If a program has many live PDBs, performance will just suck,
871 // there's no way around that.)
873 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
874 // holding on to stale entries for 2 GCs before discarding them can lead
875 // to massive space leaks. So we're changing to an arrangement where
876 // lines are evicted as soon as they are observed to be stale during a
877 // GC. This also has a side benefit of allowing the sufficiently_stale
878 // field to be removed from the SecVBitNode struct, reducing its size by
879 // 8 bytes, which is a substantial space saving considering that the
880 // struct was previously 32 or so bytes, on a 64 bit target.
882 // In order to try and mitigate the problem that the "sufficiently stale"
883 // heuristic was designed to avoid, the table size is allowed to drift
884 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
885 // means that nodes will exist in the table longer on average, and hopefully
886 // will be deleted and re-added less frequently.
888 // The previous scaling up mechanism (now called STEPUP) is retained:
889 // if residency exceeds 50%, the table is scaled up, although by a
890 // factor sqrt(2) rather than 2 as before. This effectively doubles the
891 // frequency of GCs when there are many PDBs at reduces the tendency of
892 // stale PDBs to reside for long periods in the table.
894 static OSet* secVBitTable;
896 // Stats
897 static ULong sec_vbits_new_nodes = 0;
898 static ULong sec_vbits_updates = 0;
900 // This must be a power of two; this is checked in mc_pre_clo_init().
901 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
902 // a larger address range) they take more space but we can get multiple
903 // partially-defined bytes in one if they are close to each other, reducing
904 // the number of total nodes. In practice sometimes they are clustered (eg.
905 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
906 // row), but often not. So we choose something intermediate.
907 #define BYTES_PER_SEC_VBIT_NODE 16
909 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
910 // more than this many nodes survive a GC.
911 #define STEPUP_SURVIVOR_PROPORTION 0.5
912 #define STEPUP_GROWTH_FACTOR 1.414213562
914 // If the above heuristic doesn't apply, then we may make the table
915 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
916 // this many nodes survive a GC, _and_ the total table size does
917 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
918 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
919 // effectively although gradually reduces residency and increases time
920 // between GCs for programs with small numbers of PDBs. The 80000 limit
921 // effectively limits the table size to around 2MB for programs with
922 // small numbers of PDBs, whilst giving a reasonably long lifetime to
923 // entries, to try and reduce the costs resulting from deleting and
924 // re-adding of entries.
925 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
926 #define DRIFTUP_GROWTH_FACTOR 1.015
927 #define DRIFTUP_MAX_SIZE 80000
929 // We GC the table when it gets this many nodes in it, ie. it's effectively
930 // the table size. It can change.
931 static Int secVBitLimit = 1000;
933 // The number of GCs done, used to age sec-V-bit nodes for eviction.
934 // Because it's unsigned, wrapping doesn't matter -- the right answer will
935 // come out anyway.
936 static UInt GCs_done = 0;
938 typedef
939 struct {
940 Addr a;
941 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
943 SecVBitNode;
945 static OSet* createSecVBitTable(void)
947 OSet* newSecVBitTable;
948 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
949 ( offsetof(SecVBitNode, a),
950 NULL, // use fast comparisons
951 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
952 VG_(free),
953 1000,
954 sizeof(SecVBitNode));
955 return newSecVBitTable;
958 static void gcSecVBitTable(void)
960 OSet* secVBitTable2;
961 SecVBitNode* n;
962 Int i, n_nodes = 0, n_survivors = 0;
964 GCs_done++;
966 // Create the new table.
967 secVBitTable2 = createSecVBitTable();
969 // Traverse the table, moving fresh nodes into the new table.
970 VG_(OSetGen_ResetIter)(secVBitTable);
971 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
972 // Keep node if any of its bytes are non-stale. Using
973 // get_vabits2() for the lookup is not very efficient, but I don't
974 // think it matters.
975 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
976 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
977 // Found a non-stale byte, so keep =>
978 // Insert a copy of the node into the new table.
979 SecVBitNode* n2 =
980 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
981 *n2 = *n;
982 VG_(OSetGen_Insert)(secVBitTable2, n2);
983 break;
988 // Get the before and after sizes.
989 n_nodes = VG_(OSetGen_Size)(secVBitTable);
990 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
992 // Destroy the old table, and put the new one in its place.
993 VG_(OSetGen_Destroy)(secVBitTable);
994 secVBitTable = secVBitTable2;
996 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
997 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
998 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
1001 // Increase table size if necessary.
1002 if ((Double)n_survivors
1003 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
1004 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
1005 if (VG_(clo_verbosity) > 1)
1006 VG_(message)(Vg_DebugMsg,
1007 "memcheck GC: %d new table size (stepup)\n",
1008 secVBitLimit);
1010 else
1011 if (secVBitLimit < DRIFTUP_MAX_SIZE
1012 && (Double)n_survivors
1013 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
1014 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
1015 if (VG_(clo_verbosity) > 1)
1016 VG_(message)(Vg_DebugMsg,
1017 "memcheck GC: %d new table size (driftup)\n",
1018 secVBitLimit);
1022 static UWord get_sec_vbits8(Addr a)
1024 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1025 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1026 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1027 UChar vbits8;
1028 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1029 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1030 // make it to the secondary V bits table.
1031 vbits8 = n->vbits8[amod];
1032 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1033 return vbits8;
1036 static void set_sec_vbits8(Addr a, UWord vbits8)
1038 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1039 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1040 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1041 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1042 // make it to the secondary V bits table.
1043 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1044 if (n) {
1045 n->vbits8[amod] = vbits8; // update
1046 sec_vbits_updates++;
1047 } else {
1048 // Do a table GC if necessary. Nb: do this before creating and
1049 // inserting the new node, to avoid erroneously GC'ing the new node.
1050 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1051 gcSecVBitTable();
1054 // New node: assign the specific byte, make the rest invalid (they
1055 // should never be read as-is, but be cautious).
1056 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1057 n->a = aAligned;
1058 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1059 n->vbits8[i] = V_BITS8_UNDEFINED;
1061 n->vbits8[amod] = vbits8;
1063 // Insert the new node.
1064 VG_(OSetGen_Insert)(secVBitTable, n);
1065 sec_vbits_new_nodes++;
1067 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1068 if (n_secVBit_nodes > max_secVBit_nodes)
1069 max_secVBit_nodes = n_secVBit_nodes;
1073 /* --------------- Endianness helpers --------------- */
1075 /* Returns the offset in memory of the byteno-th most significant byte
1076 in a wordszB-sized word, given the specified endianness. */
1077 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1078 UWord byteno ) {
1079 return bigendian ? (wordszB-1-byteno) : byteno;
1083 /* --------------- Ignored address ranges --------------- */
1085 /* Denotes the address-error-reportability status for address ranges:
1086 IAR_NotIgnored: the usual case -- report errors in this range
1087 IAR_CommandLine: don't report errors -- from command line setting
1088 IAR_ClientReq: don't report errors -- from client request
1090 typedef
1091 enum { IAR_INVALID=99,
1092 IAR_NotIgnored,
1093 IAR_CommandLine,
1094 IAR_ClientReq }
1095 IARKind;
1097 static const HChar* showIARKind ( IARKind iark )
1099 switch (iark) {
1100 case IAR_INVALID: return "INVALID";
1101 case IAR_NotIgnored: return "NotIgnored";
1102 case IAR_CommandLine: return "CommandLine";
1103 case IAR_ClientReq: return "ClientReq";
1104 default: return "???";
1108 // RangeMap<IARKind>
1109 static RangeMap* gIgnoredAddressRanges = NULL;
1111 static void init_gIgnoredAddressRanges ( void )
1113 if (LIKELY(gIgnoredAddressRanges != NULL))
1114 return;
1115 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1116 VG_(free), IAR_NotIgnored );
1119 Bool MC_(in_ignored_range) ( Addr a )
1121 if (LIKELY(gIgnoredAddressRanges == NULL))
1122 return False;
1123 UWord how = IAR_INVALID;
1124 UWord key_min = ~(UWord)0;
1125 UWord key_max = (UWord)0;
1126 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1127 tl_assert(key_min <= a && a <= key_max);
1128 switch (how) {
1129 case IAR_NotIgnored: return False;
1130 case IAR_CommandLine: return True;
1131 case IAR_ClientReq: return True;
1132 default: break; /* invalid */
1134 VG_(tool_panic)("MC_(in_ignore_range)");
1135 /*NOTREACHED*/
1138 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
1140 if (LIKELY(!MC_(clo_ignore_range_below_sp)))
1141 return False;
1142 tl_assert(szB >= 1 && szB <= 32);
1143 tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
1144 > MC_(clo_ignore_range_below_sp__last_offset));
1145 Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
1146 Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
1147 if (range_lo >= range_hi) {
1148 /* Bizarre. We have a wraparound situation. What should we do? */
1149 return False; // Play safe
1150 } else {
1151 /* This is the expected case. */
1152 if (range_lo <= a && a + szB - 1 <= range_hi)
1153 return True;
1154 else
1155 return False;
1157 /*NOTREACHED*/
1158 tl_assert(0);
1161 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1163 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
1165 Bool ok = VG_(parse_Addr) (ppc, result1);
1166 if (!ok)
1167 return False;
1168 if (**ppc != '-')
1169 return False;
1170 (*ppc)++;
1171 ok = VG_(parse_Addr) (ppc, result2);
1172 if (!ok)
1173 return False;
1174 return True;
1177 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1178 or fail. */
1180 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
1182 Bool ok = VG_(parse_UInt) (ppc, result1);
1183 if (!ok)
1184 return False;
1185 if (**ppc != '-')
1186 return False;
1187 (*ppc)++;
1188 ok = VG_(parse_UInt) (ppc, result2);
1189 if (!ok)
1190 return False;
1191 return True;
1194 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1195 fail. If they are valid, add them to the global set of ignored
1196 ranges. */
1197 static Bool parse_ignore_ranges ( const HChar* str0 )
1199 init_gIgnoredAddressRanges();
1200 const HChar* str = str0;
1201 const HChar** ppc = &str;
1202 while (1) {
1203 Addr start = ~(Addr)0;
1204 Addr end = (Addr)0;
1205 Bool ok = parse_Addr_pair(ppc, &start, &end);
1206 if (!ok)
1207 return False;
1208 if (start > end)
1209 return False;
1210 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1211 if (**ppc == 0)
1212 return True;
1213 if (**ppc != ',')
1214 return False;
1215 (*ppc)++;
1217 /*NOTREACHED*/
1218 return False;
1221 /* Add or remove [start, +len) from the set of ignored ranges. */
1222 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1224 init_gIgnoredAddressRanges();
1225 const Bool verbose = (VG_(clo_verbosity) > 1);
1226 if (len == 0) {
1227 return False;
1229 if (addRange) {
1230 VG_(bindRangeMap)(gIgnoredAddressRanges,
1231 start, start+len-1, IAR_ClientReq);
1232 if (verbose)
1233 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1234 (void*)start, (void*)(start+len-1));
1235 } else {
1236 VG_(bindRangeMap)(gIgnoredAddressRanges,
1237 start, start+len-1, IAR_NotIgnored);
1238 if (verbose)
1239 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1240 (void*)start, (void*)(start+len-1));
1242 if (verbose) {
1243 VG_(dmsg)("memcheck: now have %u ranges:\n",
1244 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1245 UInt i;
1246 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1247 UWord val = IAR_INVALID;
1248 UWord key_min = ~(UWord)0;
1249 UWord key_max = (UWord)0;
1250 VG_(indexRangeMap)( &key_min, &key_max, &val,
1251 gIgnoredAddressRanges, i );
1252 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n",
1253 i, key_min, key_max, showIARKind(val));
1256 return True;
1260 /* --------------- Load/store slow cases. --------------- */
1262 static
1263 __attribute__((noinline))
1264 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1265 Addr a, SizeT nBits, Bool bigendian )
1267 ULong pessim[4]; /* only used when p-l-ok=yes */
1268 SSizeT szB = nBits / 8;
1269 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1270 SSizeT i, j; /* Must be signed. */
1271 SizeT n_addrs_bad = 0;
1272 Addr ai;
1273 UChar vbits8;
1274 Bool ok;
1276 /* Code below assumes load size is a power of two and at least 64
1277 bits. */
1278 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1280 /* If this triggers, you probably just need to increase the size of
1281 the pessim array. */
1282 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1284 for (j = 0; j < szL; j++) {
1285 pessim[j] = V_BITS64_DEFINED;
1286 res[j] = V_BITS64_UNDEFINED;
1289 /* Make up a result V word, which contains the loaded data for
1290 valid addresses and Defined for invalid addresses. Iterate over
1291 the bytes in the word, from the most significant down to the
1292 least. The vbits to return are calculated into vbits128. Also
1293 compute the pessimising value to be used when
1294 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1295 info can be gleaned from the pessim array) but is used as a
1296 cross-check. */
1297 for (j = szL-1; j >= 0; j--) {
1298 ULong vbits64 = V_BITS64_UNDEFINED;
1299 ULong pessim64 = V_BITS64_DEFINED;
1300 UWord long_index = byte_offset_w(szL, bigendian, j);
1301 for (i = 8-1; i >= 0; i--) {
1302 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1303 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1304 ok = get_vbits8(ai, &vbits8);
1305 vbits64 <<= 8;
1306 vbits64 |= vbits8;
1307 if (!ok) n_addrs_bad++;
1308 pessim64 <<= 8;
1309 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1311 res[long_index] = vbits64;
1312 pessim[long_index] = pessim64;
1315 /* In the common case, all the addresses involved are valid, so we
1316 just return the computed V bits and have done. */
1317 if (LIKELY(n_addrs_bad == 0))
1318 return;
1320 /* If there's no possibility of getting a partial-loads-ok
1321 exemption, report the error and quit. */
1322 if (!MC_(clo_partial_loads_ok)) {
1323 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1324 return;
1327 /* The partial-loads-ok excemption might apply. Find out if it
1328 does. If so, don't report an addressing error, but do return
1329 Undefined for the bytes that are out of range, so as to avoid
1330 false negatives. If it doesn't apply, just report an addressing
1331 error in the usual way. */
1333 /* Some code steps along byte strings in aligned chunks
1334 even when there is only a partially defined word at the end (eg,
1335 optimised strlen). This is allowed by the memory model of
1336 modern machines, since an aligned load cannot span two pages and
1337 thus cannot "partially fault".
1339 Therefore, a load from a partially-addressible place is allowed
1340 if all of the following hold:
1341 - the command-line flag is set [by default, it isn't]
1342 - it's an aligned load
1343 - at least one of the addresses in the word *is* valid
1345 Since this suppresses the addressing error, we avoid false
1346 negatives by marking bytes undefined when they come from an
1347 invalid address.
1350 /* "at least one of the addresses is invalid" */
1351 ok = False;
1352 for (j = 0; j < szL; j++)
1353 ok |= pessim[j] != V_BITS64_DEFINED;
1354 tl_assert(ok);
1356 # if defined(VGP_s390x_linux)
1357 tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD
1358 /* OK if all loaded bytes are from the same page. */
1359 Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB);
1360 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1361 /* lxvd2x might generate an unaligned 128 bit vector load. */
1362 Bool alignedOK = (szB == 16);
1363 # else
1364 /* OK if the address is aligned by the load size. */
1365 Bool alignedOK = (0 == (a & (szB - 1)));
1366 # endif
1368 if (alignedOK && n_addrs_bad < szB) {
1369 /* Exemption applies. Use the previously computed pessimising
1370 value and return the combined result, but don't flag an
1371 addressing error. The pessimising value is Defined for valid
1372 addresses and Undefined for invalid addresses. */
1373 /* for assumption that doing bitwise or implements UifU */
1374 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1375 /* (really need "UifU" here...)
1376 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1377 for (j = szL-1; j >= 0; j--)
1378 res[j] |= pessim[j];
1379 return;
1382 /* Exemption doesn't apply. Flag an addressing error in the normal
1383 way. */
1384 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1387 MC_MAIN_STATIC
1388 __attribute__((noinline))
1389 __attribute__((used))
1390 VG_REGPARM(3)
1391 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian );
1393 MC_MAIN_STATIC
1394 __attribute__((noinline))
1395 __attribute__((used))
1396 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1397 this function may get called from hand written assembly. */
1398 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1400 PROF_EVENT(MCPE_LOADVN_SLOW);
1402 /* ------------ BEGIN semi-fast cases ------------ */
1403 /* These deal quickly-ish with the common auxiliary primary map
1404 cases on 64-bit platforms. Are merely a speedup hack; can be
1405 omitted without loss of correctness/functionality. Note that in
1406 both cases the "sizeof(void*) == 8" causes these cases to be
1407 folded out by compilers on 32-bit platforms. These are derived
1408 from LOADV64 and LOADV32.
1411 # if defined(VGA_mips64) && defined(VGABI_N32)
1412 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1413 # else
1414 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1415 # endif
1417 SecMap* sm = get_secmap_for_reading(a);
1418 UWord sm_off16 = SM_OFF_16(a);
1419 UWord vabits16 = sm->vabits16[sm_off16];
1420 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1421 return V_BITS64_DEFINED;
1422 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1423 return V_BITS64_UNDEFINED;
1424 /* else fall into the slow case */
1427 # if defined(VGA_mips64) && defined(VGABI_N32)
1428 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1429 # else
1430 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1431 # endif
1433 SecMap* sm = get_secmap_for_reading(a);
1434 UWord sm_off = SM_OFF(a);
1435 UWord vabits8 = sm->vabits8[sm_off];
1436 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1437 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1438 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1439 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1440 /* else fall into slow case */
1443 /* ------------ END semi-fast cases ------------ */
1445 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1446 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1447 SSizeT szB = nBits / 8;
1448 SSizeT i; /* Must be signed. */
1449 SizeT n_addrs_bad = 0;
1450 Addr ai;
1451 UChar vbits8;
1452 Bool ok;
1454 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1456 /* Make up a 64-bit result V word, which contains the loaded data
1457 for valid addresses and Defined for invalid addresses. Iterate
1458 over the bytes in the word, from the most significant down to
1459 the least. The vbits to return are calculated into vbits64.
1460 Also compute the pessimising value to be used when
1461 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1462 info can be gleaned from pessim64) but is used as a
1463 cross-check. */
1464 for (i = szB-1; i >= 0; i--) {
1465 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1466 ai = a + byte_offset_w(szB, bigendian, i);
1467 ok = get_vbits8(ai, &vbits8);
1468 vbits64 <<= 8;
1469 vbits64 |= vbits8;
1470 if (!ok) n_addrs_bad++;
1471 pessim64 <<= 8;
1472 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1475 /* In the common case, all the addresses involved are valid, so we
1476 just return the computed V bits and have done. */
1477 if (LIKELY(n_addrs_bad == 0))
1478 return vbits64;
1480 /* If there's no possibility of getting a partial-loads-ok
1481 exemption, report the error and quit. */
1482 if (!MC_(clo_partial_loads_ok)) {
1483 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1484 return vbits64;
1487 /* The partial-loads-ok excemption might apply. Find out if it
1488 does. If so, don't report an addressing error, but do return
1489 Undefined for the bytes that are out of range, so as to avoid
1490 false negatives. If it doesn't apply, just report an addressing
1491 error in the usual way. */
1493 /* Some code steps along byte strings in aligned word-sized chunks
1494 even when there is only a partially defined word at the end (eg,
1495 optimised strlen). This is allowed by the memory model of
1496 modern machines, since an aligned load cannot span two pages and
1497 thus cannot "partially fault". Despite such behaviour being
1498 declared undefined by ANSI C/C++.
1500 Therefore, a load from a partially-addressible place is allowed
1501 if all of the following hold:
1502 - the command-line flag is set [by default, it isn't]
1503 - it's a word-sized, word-aligned load
1504 - at least one of the addresses in the word *is* valid
1506 Since this suppresses the addressing error, we avoid false
1507 negatives by marking bytes undefined when they come from an
1508 invalid address.
1511 /* "at least one of the addresses is invalid" */
1512 tl_assert(pessim64 != V_BITS64_DEFINED);
1514 # if defined(VGA_mips64) && defined(VGABI_N32)
1515 if (szB == VG_WORDSIZE * 2 && VG_IS_WORD_ALIGNED(a)
1516 && n_addrs_bad < VG_WORDSIZE * 2)
1517 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1518 /* On power unaligned loads of words are OK. */
1519 if (szB == VG_WORDSIZE && n_addrs_bad < VG_WORDSIZE)
1520 # else
1521 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1522 && n_addrs_bad < VG_WORDSIZE)
1523 # endif
1525 /* Exemption applies. Use the previously computed pessimising
1526 value for vbits64 and return the combined result, but don't
1527 flag an addressing error. The pessimising value is Defined
1528 for valid addresses and Undefined for invalid addresses. */
1529 /* for assumption that doing bitwise or implements UifU */
1530 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1531 /* (really need "UifU" here...)
1532 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1533 vbits64 |= pessim64;
1534 return vbits64;
1537 /* Also, in appears that gcc generates string-stepping code in
1538 32-bit chunks on 64 bit platforms. So, also grant an exception
1539 for this case. Note that the first clause of the conditional
1540 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1541 will get folded out in 32 bit builds. */
1542 # if defined(VGA_mips64) && defined(VGABI_N32)
1543 if (VG_WORDSIZE == 4
1544 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1545 # else
1546 if (VG_WORDSIZE == 8
1547 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1548 # endif
1550 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1551 /* (really need "UifU" here...)
1552 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1553 vbits64 |= pessim64;
1554 /* Mark the upper 32 bits as undefined, just to be on the safe
1555 side. */
1556 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1557 return vbits64;
1560 /* Exemption doesn't apply. Flag an addressing error in the normal
1561 way. */
1562 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1564 return vbits64;
1568 static
1569 __attribute__((noinline))
1570 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1572 SizeT szB = nBits / 8;
1573 SizeT i, n_addrs_bad = 0;
1574 UChar vbits8;
1575 Addr ai;
1576 Bool ok;
1578 PROF_EVENT(MCPE_STOREVN_SLOW);
1580 /* ------------ BEGIN semi-fast cases ------------ */
1581 /* These deal quickly-ish with the common auxiliary primary map
1582 cases on 64-bit platforms. Are merely a speedup hack; can be
1583 omitted without loss of correctness/functionality. Note that in
1584 both cases the "sizeof(void*) == 8" causes these cases to be
1585 folded out by compilers on 32-bit platforms. The logic below
1586 is somewhat similar to some cases extensively commented in
1587 MC_(helperc_STOREV8).
1589 # if defined(VGA_mips64) && defined(VGABI_N32)
1590 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1591 # else
1592 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1593 # endif
1595 SecMap* sm = get_secmap_for_reading(a);
1596 UWord sm_off16 = SM_OFF_16(a);
1597 UWord vabits16 = sm->vabits16[sm_off16];
1598 if (LIKELY( !is_distinguished_sm(sm) &&
1599 (VA_BITS16_DEFINED == vabits16 ||
1600 VA_BITS16_UNDEFINED == vabits16) )) {
1601 /* Handle common case quickly: a is suitably aligned, */
1602 /* is mapped, and is addressible. */
1603 // Convert full V-bits in register to compact 2-bit form.
1604 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1605 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
1606 return;
1607 } else if (V_BITS64_UNDEFINED == vbytes) {
1608 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
1609 return;
1611 /* else fall into the slow case */
1613 /* else fall into the slow case */
1616 # if defined(VGA_mips64) && defined(VGABI_N32)
1617 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1618 # else
1619 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1620 # endif
1622 SecMap* sm = get_secmap_for_reading(a);
1623 UWord sm_off = SM_OFF(a);
1624 UWord vabits8 = sm->vabits8[sm_off];
1625 if (LIKELY( !is_distinguished_sm(sm) &&
1626 (VA_BITS8_DEFINED == vabits8 ||
1627 VA_BITS8_UNDEFINED == vabits8) )) {
1628 /* Handle common case quickly: a is suitably aligned, */
1629 /* is mapped, and is addressible. */
1630 // Convert full V-bits in register to compact 2-bit form.
1631 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1632 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1633 return;
1634 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1635 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1636 return;
1638 /* else fall into the slow case */
1640 /* else fall into the slow case */
1642 /* ------------ END semi-fast cases ------------ */
1644 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1646 /* Dump vbytes in memory, iterating from least to most significant
1647 byte. At the same time establish addressibility of the location. */
1648 for (i = 0; i < szB; i++) {
1649 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1650 ai = a + byte_offset_w(szB, bigendian, i);
1651 vbits8 = vbytes & 0xff;
1652 ok = set_vbits8(ai, vbits8);
1653 if (!ok) n_addrs_bad++;
1654 vbytes >>= 8;
1657 /* If an address error has happened, report it. */
1658 if (n_addrs_bad > 0)
1659 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1663 /*------------------------------------------------------------*/
1664 /*--- Setting permissions over address ranges. ---*/
1665 /*------------------------------------------------------------*/
1667 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1668 UWord dsm_num )
1670 UWord sm_off, sm_off16;
1671 UWord vabits2 = vabits16 & 0x3;
1672 SizeT lenA, lenB, len_to_next_secmap;
1673 Addr aNext;
1674 SecMap* sm;
1675 SecMap** sm_ptr;
1676 SecMap* example_dsm;
1678 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1680 /* Check the V+A bits make sense. */
1681 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1682 VA_BITS16_UNDEFINED == vabits16 ||
1683 VA_BITS16_DEFINED == vabits16);
1685 // This code should never write PDBs; ensure this. (See comment above
1686 // set_vabits2().)
1687 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1689 if (lenT == 0)
1690 return;
1692 if (lenT > 256 * 1024 * 1024) {
1693 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1694 const HChar* s = "unknown???";
1695 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1696 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1697 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1698 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1699 "large range [0x%lx, 0x%lx) (%s)\n",
1700 a, a + lenT, s);
1704 #ifndef PERF_FAST_SARP
1705 /*------------------ debug-only case ------------------ */
1707 // Endianness doesn't matter here because all bytes are being set to
1708 // the same value.
1709 // Nb: We don't have to worry about updating the sec-V-bits table
1710 // after these set_vabits2() calls because this code never writes
1711 // VA_BITS2_PARTDEFINED values.
1712 SizeT i;
1713 for (i = 0; i < lenT; i++) {
1714 set_vabits2(a + i, vabits2);
1716 return;
1718 #endif
1720 /*------------------ standard handling ------------------ */
1722 /* Get the distinguished secondary that we might want
1723 to use (part of the space-compression scheme). */
1724 example_dsm = &sm_distinguished[dsm_num];
1726 // We have to handle ranges covering various combinations of partial and
1727 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1728 // Cases marked with a '*' are common.
1730 // TYPE PARTS USED
1731 // ---- ----------
1732 // * one partial sec-map (p) 1
1733 // - one whole sec-map (P) 2
1735 // * two partial sec-maps (pp) 1,3
1736 // - one partial, one whole sec-map (pP) 1,2
1737 // - one whole, one partial sec-map (Pp) 2,3
1738 // - two whole sec-maps (PP) 2,2
1740 // * one partial, one whole, one partial (pPp) 1,2,3
1741 // - one partial, two whole (pPP) 1,2,2
1742 // - two whole, one partial (PPp) 2,2,3
1743 // - three whole (PPP) 2,2,2
1745 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1746 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1747 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1748 // - N whole (PP...PP) 2,2...2,3
1750 // Break up total length (lenT) into two parts: length in the first
1751 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1752 aNext = start_of_this_sm(a) + SM_SIZE;
1753 len_to_next_secmap = aNext - a;
1754 if ( lenT <= len_to_next_secmap ) {
1755 // Range entirely within one sec-map. Covers almost all cases.
1756 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1757 lenA = lenT;
1758 lenB = 0;
1759 } else if (is_start_of_sm(a)) {
1760 // Range spans at least one whole sec-map, and starts at the beginning
1761 // of a sec-map; skip to Part 2.
1762 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1763 lenA = 0;
1764 lenB = lenT;
1765 goto part2;
1766 } else {
1767 // Range spans two or more sec-maps, first one is partial.
1768 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1769 lenA = len_to_next_secmap;
1770 lenB = lenT - lenA;
1773 //------------------------------------------------------------------------
1774 // Part 1: Deal with the first sec_map. Most of the time the range will be
1775 // entirely within a sec_map and this part alone will suffice. Also,
1776 // doing it this way lets us avoid repeatedly testing for the crossing of
1777 // a sec-map boundary within these loops.
1778 //------------------------------------------------------------------------
1780 // If it's distinguished, make it undistinguished if necessary.
1781 sm_ptr = get_secmap_ptr(a);
1782 if (is_distinguished_sm(*sm_ptr)) {
1783 if (*sm_ptr == example_dsm) {
1784 // Sec-map already has the V+A bits that we want, so skip.
1785 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1786 a = aNext;
1787 lenA = 0;
1788 } else {
1789 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1790 *sm_ptr = copy_for_writing(*sm_ptr);
1793 sm = *sm_ptr;
1795 // 1 byte steps
1796 while (True) {
1797 if (VG_IS_8_ALIGNED(a)) break;
1798 if (lenA < 1) break;
1799 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1800 sm_off = SM_OFF(a);
1801 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1802 a += 1;
1803 lenA -= 1;
1805 // 8-aligned, 8 byte steps
1806 while (True) {
1807 if (lenA < 8) break;
1808 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1809 sm_off16 = SM_OFF_16(a);
1810 sm->vabits16[sm_off16] = vabits16;
1811 a += 8;
1812 lenA -= 8;
1814 // 1 byte steps
1815 while (True) {
1816 if (lenA < 1) break;
1817 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1818 sm_off = SM_OFF(a);
1819 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1820 a += 1;
1821 lenA -= 1;
1824 // We've finished the first sec-map. Is that it?
1825 if (lenB == 0)
1826 return;
1828 //------------------------------------------------------------------------
1829 // Part 2: Fast-set entire sec-maps at a time.
1830 //------------------------------------------------------------------------
1831 part2:
1832 // 64KB-aligned, 64KB steps.
1833 // Nb: we can reach here with lenB < SM_SIZE
1834 tl_assert(0 == lenA);
1835 while (True) {
1836 if (lenB < SM_SIZE) break;
1837 tl_assert(is_start_of_sm(a));
1838 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1839 sm_ptr = get_secmap_ptr(a);
1840 if (!is_distinguished_sm(*sm_ptr)) {
1841 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1842 // Free the non-distinguished sec-map that we're replacing. This
1843 // case happens moderately often, enough to be worthwhile.
1844 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1845 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1847 update_SM_counts(*sm_ptr, example_dsm);
1848 // Make the sec-map entry point to the example DSM
1849 *sm_ptr = example_dsm;
1850 lenB -= SM_SIZE;
1851 a += SM_SIZE;
1854 // We've finished the whole sec-maps. Is that it?
1855 if (lenB == 0)
1856 return;
1858 //------------------------------------------------------------------------
1859 // Part 3: Finish off the final partial sec-map, if necessary.
1860 //------------------------------------------------------------------------
1862 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1864 // If it's distinguished, make it undistinguished if necessary.
1865 sm_ptr = get_secmap_ptr(a);
1866 if (is_distinguished_sm(*sm_ptr)) {
1867 if (*sm_ptr == example_dsm) {
1868 // Sec-map already has the V+A bits that we want, so stop.
1869 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1870 return;
1871 } else {
1872 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1873 *sm_ptr = copy_for_writing(*sm_ptr);
1876 sm = *sm_ptr;
1878 // 8-aligned, 8 byte steps
1879 while (True) {
1880 if (lenB < 8) break;
1881 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1882 sm_off16 = SM_OFF_16(a);
1883 sm->vabits16[sm_off16] = vabits16;
1884 a += 8;
1885 lenB -= 8;
1887 // 1 byte steps
1888 while (True) {
1889 if (lenB < 1) return;
1890 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1891 sm_off = SM_OFF(a);
1892 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1893 a += 1;
1894 lenB -= 1;
1899 /* --- Set permissions for arbitrary address ranges --- */
1901 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1903 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1904 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1905 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1906 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1907 ocache_sarp_Clear_Origins ( a, len );
1910 static void make_mem_undefined ( Addr a, SizeT len )
1912 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1913 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1914 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1917 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1919 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1920 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1921 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1922 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1923 ocache_sarp_Set_Origins ( a, len, otag );
1926 static
1927 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1928 ThreadId tid, UInt okind )
1930 UInt ecu;
1931 ExeContext* here;
1932 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1933 if it is invalid. So no need to do it here. */
1934 tl_assert(okind <= 3);
1935 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1936 tl_assert(here);
1937 ecu = VG_(get_ECU_from_ExeContext)(here);
1938 tl_assert(VG_(is_plausible_ECU)(ecu));
1939 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1942 static
1943 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1945 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1948 static
1949 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1951 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1954 void MC_(make_mem_defined) ( Addr a, SizeT len )
1956 PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1957 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1958 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1959 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1960 ocache_sarp_Clear_Origins ( a, len );
1963 __attribute__((unused))
1964 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1966 MC_(make_mem_defined)(a, len);
1969 /* For each byte in [a,a+len), if the byte is addressable, make it be
1970 defined, but if it isn't addressible, leave it alone. In other
1971 words a version of MC_(make_mem_defined) that doesn't mess with
1972 addressibility. Low-performance implementation. */
1973 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1975 SizeT i;
1976 UChar vabits2;
1977 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1978 for (i = 0; i < len; i++) {
1979 vabits2 = get_vabits2( a+i );
1980 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1981 set_vabits2(a+i, VA_BITS2_DEFINED);
1982 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1983 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1989 /* Similarly (needed for mprotect handling ..) */
1990 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1992 SizeT i;
1993 UChar vabits2;
1994 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1995 for (i = 0; i < len; i++) {
1996 vabits2 = get_vabits2( a+i );
1997 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1998 set_vabits2(a+i, VA_BITS2_DEFINED);
1999 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
2000 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
2006 /* --- Block-copy permissions (needed for implementing realloc() and
2007 sys_mremap). --- */
2009 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
2011 SizeT i, j;
2012 UChar vabits2, vabits8;
2013 Bool aligned, nooverlap;
2015 DEBUG("MC_(copy_address_range_state)\n");
2016 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
2018 if (len == 0 || src == dst)
2019 return;
2021 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
2022 nooverlap = src+len <= dst || dst+len <= src;
2024 if (nooverlap && aligned) {
2026 /* Vectorised fast case, when no overlap and suitably aligned */
2027 /* vector loop */
2028 i = 0;
2029 while (len >= 4) {
2030 vabits8 = get_vabits8_for_aligned_word32( src+i );
2031 set_vabits8_for_aligned_word32( dst+i, vabits8 );
2032 if (LIKELY(VA_BITS8_DEFINED == vabits8
2033 || VA_BITS8_UNDEFINED == vabits8
2034 || VA_BITS8_NOACCESS == vabits8)) {
2035 /* do nothing */
2036 } else {
2037 /* have to copy secondary map info */
2038 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
2039 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
2040 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
2041 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
2042 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
2043 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
2044 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
2045 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
2047 i += 4;
2048 len -= 4;
2050 /* fixup loop */
2051 while (len >= 1) {
2052 vabits2 = get_vabits2( src+i );
2053 set_vabits2( dst+i, vabits2 );
2054 if (VA_BITS2_PARTDEFINED == vabits2) {
2055 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2057 i++;
2058 len--;
2061 } else {
2063 /* We have to do things the slow way */
2064 if (src < dst) {
2065 for (i = 0, j = len-1; i < len; i++, j--) {
2066 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
2067 vabits2 = get_vabits2( src+j );
2068 set_vabits2( dst+j, vabits2 );
2069 if (VA_BITS2_PARTDEFINED == vabits2) {
2070 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
2075 if (src > dst) {
2076 for (i = 0; i < len; i++) {
2077 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
2078 vabits2 = get_vabits2( src+i );
2079 set_vabits2( dst+i, vabits2 );
2080 if (VA_BITS2_PARTDEFINED == vabits2) {
2081 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2090 /*------------------------------------------------------------*/
2091 /*--- Origin tracking stuff - cache basics ---*/
2092 /*------------------------------------------------------------*/
2094 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2095 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2097 Note that this implementation draws inspiration from the "origin
2098 tracking by value piggybacking" scheme described in "Tracking Bad
2099 Apples: Reporting the Origin of Null and Undefined Value Errors"
2100 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2101 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2102 implemented completely differently.
2104 Origin tags and ECUs -- about the shadow values
2105 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107 This implementation tracks the defining point of all uninitialised
2108 values using so called "origin tags", which are 32-bit integers,
2109 rather than using the values themselves to encode the origins. The
2110 latter, so-called value piggybacking", is what the OOPSLA07 paper
2111 describes.
2113 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2114 ints (UInts), regardless of the machine's word size. Each tag
2115 comprises an upper 30-bit ECU field and a lower 2-bit
2116 'kind' field. The ECU field is a number given out by m_execontext
2117 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2118 directly as an origin tag (otag), but in fact we want to put
2119 additional information 'kind' field to indicate roughly where the
2120 tag came from. This helps print more understandable error messages
2121 for the user -- it has no other purpose. In summary:
2123 * Both ECUs and origin tags are represented as 32-bit words
2125 * m_execontext and the core-tool interface deal purely in ECUs.
2126 They have no knowledge of origin tags - that is a purely
2127 Memcheck-internal matter.
2129 * all valid ECUs have the lowest 2 bits zero and at least
2130 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2132 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2133 constants defined in mc_include.h.
2135 * to convert an otag back to an ECU, AND it with ~3
2137 One important fact is that no valid otag is zero. A zero otag is
2138 used by the implementation to indicate "no origin", which could
2139 mean that either the value is defined, or it is undefined but the
2140 implementation somehow managed to lose the origin.
2142 The ECU used for memory created by malloc etc is derived from the
2143 stack trace at the time the malloc etc happens. This means the
2144 mechanism can show the exact allocation point for heap-created
2145 uninitialised values.
2147 In contrast, it is simply too expensive to create a complete
2148 backtrace for each stack allocation. Therefore we merely use a
2149 depth-1 backtrace for stack allocations, which can be done once at
2150 translation time, rather than N times at run time. The result of
2151 this is that, for stack created uninitialised values, Memcheck can
2152 only show the allocating function, and not what called it.
2153 Furthermore, compilers tend to move the stack pointer just once at
2154 the start of the function, to allocate all locals, and so in fact
2155 the stack origin almost always simply points to the opening brace
2156 of the function. Net result is, for stack origins, the mechanism
2157 can tell you in which function the undefined value was created, but
2158 that's all. Users will need to carefully check all locals in the
2159 specified function.
2161 Shadowing registers and memory
2162 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2164 Memory is shadowed using a two level cache structure (ocacheL1 and
2165 ocacheL2). Memory references are first directed to ocacheL1. This
2166 is a traditional 2-way set associative cache with 32-byte lines and
2167 approximate LRU replacement within each set.
2169 A naive implementation would require storing one 32 bit otag for
2170 each byte of memory covered, a 4:1 space overhead. Instead, there
2171 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2172 that shows which of the 4 bytes have that shadow value and which
2173 have a shadow value of zero (indicating no origin). Hence a lot of
2174 space is saved, but the cost is that only one different origin per
2175 4 bytes of address space can be represented. This is a source of
2176 imprecision, but how much of a problem it really is remains to be
2177 seen.
2179 A cache line that contains all zeroes ("no origins") contains no
2180 useful information, and can be ejected from the L1 cache "for
2181 free", in the sense that a read miss on the L1 causes a line of
2182 zeroes to be installed. However, ejecting a line containing
2183 nonzeroes risks losing origin information permanently. In order to
2184 prevent such lossage, ejected nonzero lines are placed in a
2185 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2186 lines. This can grow arbitrarily large, and so should ensure that
2187 Memcheck runs out of memory in preference to losing useful origin
2188 info due to cache size limitations.
2190 Shadowing registers is a bit tricky, because the shadow values are
2191 32 bits, regardless of the size of the register. That gives a
2192 problem for registers smaller than 32 bits. The solution is to
2193 find spaces in the guest state that are unused, and use those to
2194 shadow guest state fragments smaller than 32 bits. For example, on
2195 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2196 shadow are allocated for the register's otag, then there are still
2197 12 bytes left over which could be used to shadow 3 other values.
2199 This implies there is some non-obvious mapping from guest state
2200 (start,length) pairs to the relevant shadow offset (for the origin
2201 tags). And it is unfortunately guest-architecture specific. The
2202 mapping is contained in mc_machine.c, which is quite lengthy but
2203 straightforward.
2205 Instrumenting the IR
2206 ~~~~~~~~~~~~~~~~~~~~
2208 Instrumentation is largely straightforward, and done by the
2209 functions schemeE and schemeS in mc_translate.c. These generate
2210 code for handling the origin tags of expressions (E) and statements
2211 (S) respectively. The rather strange names are a reference to the
2212 "compilation schemes" shown in Simon Peyton Jones' book "The
2213 Implementation of Functional Programming Languages" (Prentice Hall,
2214 1987, see
2215 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2217 schemeS merely arranges to move shadow values around the guest
2218 state to track the incoming IR. schemeE is largely trivial too.
2219 The only significant point is how to compute the otag corresponding
2220 to binary (or ternary, quaternary, etc) operator applications. The
2221 rule is simple: just take whichever value is larger (32-bit
2222 unsigned max). Constants get the special value zero. Hence this
2223 rule always propagates a nonzero (known) otag in preference to a
2224 zero (unknown, or more likely, value-is-defined) tag, as we want.
2225 If two different undefined values are inputs to a binary operator
2226 application, then which is propagated is arbitrary, but that
2227 doesn't matter, since the program is erroneous in using either of
2228 the values, and so there's no point in attempting to propagate
2229 both.
2231 Since constants are abstracted to (otag) zero, much of the
2232 instrumentation code can be folded out without difficulty by the
2233 generic post-instrumentation IR cleanup pass, using these rules:
2234 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2235 constants is evaluated at JIT time. And the resulting dead code
2236 removal. In practice this causes surprisingly few Max32Us to
2237 survive through to backend code generation.
2239 Integration with the V-bits machinery
2240 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2242 This is again largely straightforward. Mostly the otag and V bits
2243 stuff are independent. The only point of interaction is when the V
2244 bits instrumenter creates a call to a helper function to report an
2245 uninitialised value error -- in that case it must first use schemeE
2246 to get hold of the origin tag expression for the value, and pass
2247 that to the helper too.
2249 There is the usual stuff to do with setting address range
2250 permissions. When memory is painted undefined, we must also know
2251 the origin tag to paint with, which involves some tedious plumbing,
2252 particularly to do with the fast case stack handlers. When memory
2253 is painted defined or noaccess then the origin tags must be forced
2254 to zero.
2256 One of the goals of the implementation was to ensure that the
2257 non-origin tracking mode isn't slowed down at all. To do this,
2258 various functions to do with memory permissions setting (again,
2259 mostly pertaining to the stack) are duplicated for the with- and
2260 without-otag case.
2262 Dealing with stack redzones, and the NIA cache
2263 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2265 This is one of the few non-obvious parts of the implementation.
2267 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2268 reserved area below the stack pointer, that can be used as scratch
2269 space by compiler generated code for functions. In the Memcheck
2270 sources this is referred to as the "stack redzone". The important
2271 thing here is that such redzones are considered volatile across
2272 function calls and returns. So Memcheck takes care to mark them as
2273 undefined for each call and return, on the afflicted platforms.
2274 Past experience shows this is essential in order to get reliable
2275 messages about uninitialised values that come from the stack.
2277 So the question is, when we paint a redzone undefined, what origin
2278 tag should we use for it? Consider a function f() calling g(). If
2279 we paint the redzone using an otag derived from the ExeContext of
2280 the CALL/BL instruction in f, then any errors in g causing it to
2281 use uninitialised values that happen to lie in the redzone, will be
2282 reported as having their origin in f. Which is highly confusing.
2284 The same applies for returns: if, on a return, we paint the redzone
2285 using a origin tag derived from the ExeContext of the RET/BLR
2286 instruction in g, then any later errors in f causing it to use
2287 uninitialised values in the redzone, will be reported as having
2288 their origin in g. Which is just as confusing.
2290 To do it right, in both cases we need to use an origin tag which
2291 pertains to the instruction which dynamically follows the CALL/BL
2292 or RET/BLR. In short, one derived from the NIA - the "next
2293 instruction address".
2295 To make this work, Memcheck's redzone-painting helper,
2296 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2297 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2298 ExeContext's ECU as the basis for the otag used to paint the
2299 redzone. The expensive part of this is converting an NIA into an
2300 ECU, since this happens once for every call and every return. So
2301 we use a simple 511-line, 2-way set associative cache
2302 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2303 the cost out.
2305 Further background comments
2306 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2308 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2309 > it really just the address of the relevant ExeContext?
2311 Well, it's not the address, but a value which has a 1-1 mapping
2312 with ExeContexts, and is guaranteed not to be zero, since zero
2313 denotes (to memcheck) "unknown origin or defined value". So these
2314 UInts are just numbers starting at 4 and incrementing by 4; each
2315 ExeContext is given a number when it is created. (*** NOTE this
2316 confuses otags and ECUs; see comments above ***).
2318 Making these otags 32-bit regardless of the machine's word size
2319 makes the 64-bit implementation easier (next para). And it doesn't
2320 really limit us in any way, since for the tags to overflow would
2321 require that the program somehow caused 2^30-1 different
2322 ExeContexts to be created, in which case it is probably in deep
2323 trouble. Not to mention V will have soaked up many tens of
2324 gigabytes of memory merely to store them all.
2326 So having 64-bit origins doesn't really buy you anything, and has
2327 the following downsides:
2329 Suppose that instead, an otag is a UWord. This would mean that, on
2330 a 64-bit target,
2332 1. It becomes hard to shadow any element of guest state which is
2333 smaller than 8 bytes. To do so means you'd need to find some
2334 8-byte-sized hole in the guest state which you don't want to
2335 shadow, and use that instead to hold the otag. On ppc64, the
2336 condition code register(s) are split into 20 UChar sized pieces,
2337 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2338 and so that would entail finding 160 bytes somewhere else in the
2339 guest state.
2341 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2342 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2343 same) and so I had to look for 4 untracked otag-sized areas in
2344 the guest state to make that possible.
2346 The same problem exists of course when origin tags are only 32
2347 bits, but it's less extreme.
2349 2. (More compelling) it doubles the size of the origin shadow
2350 memory. Given that the shadow memory is organised as a fixed
2351 size cache, and that accuracy of tracking is limited by origins
2352 falling out the cache due to space conflicts, this isn't good.
2354 > Another question: is the origin tracking perfect, or are there
2355 > cases where it fails to determine an origin?
2357 It is imperfect for at least for the following reasons, and
2358 probably more:
2360 * Insufficient capacity in the origin cache. When a line is
2361 evicted from the cache it is gone forever, and so subsequent
2362 queries for the line produce zero, indicating no origin
2363 information. Interestingly, a line containing all zeroes can be
2364 evicted "free" from the cache, since it contains no useful
2365 information, so there is scope perhaps for some cleverer cache
2366 management schemes. (*** NOTE, with the introduction of the
2367 second level origin tag cache, ocacheL2, this is no longer a
2368 problem. ***)
2370 * The origin cache only stores one otag per 32-bits of address
2371 space, plus 4 bits indicating which of the 4 bytes has that tag
2372 and which are considered defined. The result is that if two
2373 undefined bytes in the same word are stored in memory, the first
2374 stored byte's origin will be lost and replaced by the origin for
2375 the second byte.
2377 * Nonzero origin tags for defined values. Consider a binary
2378 operator application op(x,y). Suppose y is undefined (and so has
2379 a valid nonzero origin tag), and x is defined, but erroneously
2380 has a nonzero origin tag (defined values should have tag zero).
2381 If the erroneous tag has a numeric value greater than y's tag,
2382 then the rule for propagating origin tags though binary
2383 operations, which is simply to take the unsigned max of the two
2384 tags, will erroneously propagate x's tag rather than y's.
2386 * Some obscure uses of x86/amd64 byte registers can cause lossage
2387 or confusion of origins. %AH .. %DH are treated as different
2388 from, and unrelated to, their parent registers, %EAX .. %EDX.
2389 So some weird sequences like
2391 movb undefined-value, %AH
2392 movb defined-value, %AL
2393 .. use %AX or %EAX ..
2395 will cause the origin attributed to %AH to be ignored, since %AL,
2396 %AX, %EAX are treated as the same register, and %AH as a
2397 completely separate one.
2399 But having said all that, it actually seems to work fairly well in
2400 practice.
2403 static UWord stats_ocacheL1_find = 0;
2404 static UWord stats_ocacheL1_found_at_1 = 0;
2405 static UWord stats_ocacheL1_found_at_N = 0;
2406 static UWord stats_ocacheL1_misses = 0;
2407 static UWord stats_ocacheL1_lossage = 0;
2408 static UWord stats_ocacheL1_movefwds = 0;
2410 static UWord stats__ocacheL2_finds = 0;
2411 static UWord stats__ocacheL2_adds = 0;
2412 static UWord stats__ocacheL2_dels = 0;
2413 static UWord stats__ocacheL2_misses = 0;
2414 static UWord stats__ocacheL2_n_nodes_max = 0;
2416 /* Cache of 32-bit values, one every 32 bits of address space */
2418 #define OC_BITS_PER_LINE 5
2419 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2421 static INLINE UWord oc_line_offset ( Addr a ) {
2422 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2424 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2425 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2428 #define OC_LINES_PER_SET 2
2430 #define OC_N_SET_BITS 20
2431 #define OC_N_SETS (1 << OC_N_SET_BITS)
2433 /* These settings give:
2434 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2435 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2438 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2441 /* Originally (pre Dec 2021) it was the case that this code had a
2442 parameterizable cache line size, set by changing OC_BITS_PER_LINE.
2443 However, as a result of the speedup fixes necessitated by bug 446103, that
2444 is no longer really the case, and much of the L1 and L2 cache code has been
2445 tuned specifically for the case OC_BITS_PER_LINE == 5 (that is, the line
2446 size is 32 bytes). Changing that would require a bunch of re-tuning
2447 effort. So let's set it in stone for now. */
2448 STATIC_ASSERT(OC_BITS_PER_LINE == 5);
2449 STATIC_ASSERT(OC_LINES_PER_SET == 2);
2451 /* Fundamentally we want an OCacheLine structure (see below) as follows:
2452 struct {
2453 Addr tag;
2454 UInt w32 [OC_W32S_PER_LINE];
2455 UChar descr[OC_W32S_PER_LINE];
2457 However, in various places, we want to set the w32[] and descr[] arrays to
2458 zero, or check if they are zero. This can be a very hot path (per bug
2459 446103). So, instead, we have a union which is either those two arrays
2460 (OCacheLine_Main) or simply an array of ULongs (OCacheLine_W64s). For the
2461 set-zero/test-zero operations, the OCacheLine_W64s are used.
2464 // To ensure that OCacheLine.descr[] will fit in an integral number of ULongs.
2465 STATIC_ASSERT(0 == (OC_W32S_PER_LINE % 8));
2467 #define OC_W64S_PER_MAIN /* "MAIN" meaning "struct OCacheLine_Main" */ \
2468 (OC_W32S_PER_LINE / 2 /* covers OCacheLine_Main.w32[] */ \
2469 + OC_W32S_PER_LINE / 8) /* covers OCacheLine_Main.descr[] */
2470 STATIC_ASSERT(OC_W64S_PER_MAIN == 5);
2472 typedef
2473 ULong OCacheLine_W64s[OC_W64S_PER_MAIN];
2475 typedef
2476 struct {
2477 UInt w32 [OC_W32S_PER_LINE];
2478 UChar descr[OC_W32S_PER_LINE];
2480 OCacheLine_Main;
2482 STATIC_ASSERT(sizeof(OCacheLine_W64s) == sizeof(OCacheLine_Main));
2484 typedef
2485 struct {
2486 Addr tag;
2487 union {
2488 OCacheLine_W64s w64s;
2489 OCacheLine_Main main;
2490 } u;
2492 OCacheLine;
2494 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2495 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2496 and 'z' if all the represented tags are zero. */
2497 static inline UChar classify_OCacheLine ( OCacheLine* line )
2499 UWord i;
2500 if (line->tag == 1/*invalid*/)
2501 return 'e'; /* EMPTY */
2502 tl_assert(is_valid_oc_tag(line->tag));
2504 // BEGIN fast special-case of the test loop below. This will detect
2505 // zero-ness (case 'z') for a subset of cases that the loop below will,
2506 // hence is safe.
2507 if (OC_W64S_PER_MAIN == 5) {
2508 if (line->u.w64s[0] == 0
2509 && line->u.w64s[1] == 0 && line->u.w64s[2] == 0
2510 && line->u.w64s[3] == 0 && line->u.w64s[4] == 0) {
2511 return 'z';
2513 } else {
2514 tl_assert2(0, "unsupported line size (classify_OCacheLine)");
2516 // END fast special-case of the test loop below.
2518 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2519 tl_assert(0 == ((~0xF) & line->u.main.descr[i]));
2520 if (line->u.main.w32[i] > 0 && line->u.main.descr[i] > 0)
2521 return 'n'; /* NONZERO - contains useful info */
2523 return 'z'; /* ZERO - no useful info */
2526 typedef
2527 struct {
2528 OCacheLine line[OC_LINES_PER_SET];
2530 OCacheSet;
2532 typedef
2533 struct {
2534 OCacheSet set[OC_N_SETS];
2536 OCache;
2538 static OCache* ocacheL1 = NULL;
2539 static UWord ocacheL1_event_ctr = 0;
2541 static void init_ocacheL2 ( void ); /* fwds */
2542 static void init_OCache ( void )
2544 UWord line, set;
2545 tl_assert(MC_(clo_mc_level) >= 3);
2546 tl_assert(ocacheL1 == NULL);
2547 SysRes sres = VG_(am_shadow_alloc)(sizeof(OCache));
2548 if (sr_isError(sres)) {
2549 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2550 sizeof(OCache), sr_Err(sres) );
2552 ocacheL1 = (void *)(Addr)sr_Res(sres);
2553 tl_assert(ocacheL1 != NULL);
2554 for (set = 0; set < OC_N_SETS; set++) {
2555 for (line = 0; line < OC_LINES_PER_SET; line++) {
2556 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2559 init_ocacheL2();
2562 static inline void moveLineForwards ( OCacheSet* set, UWord lineno )
2564 OCacheLine tmp;
2565 stats_ocacheL1_movefwds++;
2566 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2567 tmp = set->line[lineno-1];
2568 set->line[lineno-1] = set->line[lineno];
2569 set->line[lineno] = tmp;
2572 static inline void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2573 UWord i;
2574 if (OC_W32S_PER_LINE == 8) {
2575 // BEGIN fast special-case of the loop below
2576 tl_assert(OC_W64S_PER_MAIN == 5);
2577 line->u.w64s[0] = 0;
2578 line->u.w64s[1] = 0;
2579 line->u.w64s[2] = 0;
2580 line->u.w64s[3] = 0;
2581 line->u.w64s[4] = 0;
2582 // END fast special-case of the loop below
2583 } else {
2584 tl_assert2(0, "unsupported line size (zeroise_OCacheLine)");
2585 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2586 line->u.main.w32[i] = 0; /* NO ORIGIN */
2587 line->u.main.descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2590 line->tag = tag;
2593 //////////////////////////////////////////////////////////////
2594 //// OCache backing store
2596 // The backing store for ocacheL1 is, conceptually, an AVL tree of lines that
2597 // got ejected from the L1 (a "victim cache"), and which actually contain
2598 // useful info -- that is, for which classify_OCacheLine would return 'n' and
2599 // no other value. However, the tree can grow large, and searching/updating
2600 // it can be hot paths. Hence we "take out" 12 significant bits of the key by
2601 // having 4096 trees, and select one using HASH_OCACHE_TAG.
2603 // What that hash function returns isn't important so long as it is a pure
2604 // function of the tag values, and is < 4096. However, it is critical for
2605 // performance of long SARPs. Hence the extra shift of 11 bits. This means
2606 // each tree conceptually is assigned to contiguous sequences of 2048 lines in
2607 // the "line address space", giving some locality of reference when scanning
2608 // linearly through address space, as is done by a SARP. Changing that 11 to
2609 // 0 gives terrible performance on long SARPs, presumably because each new
2610 // line is in a different tree, hence we wind up thrashing the (CPU's) caches.
2612 // On 32-bit targets, we have to be a bit careful not to shift out so many
2613 // bits that not all 2^12 trees get used. That leads to the constraint
2614 // (OC_BITS_PER_LINE + 11 + 12) < 32. Note that the 11 is the only thing we
2615 // can change here. In this case we have OC_BITS_PER_LINE == 5, hence the
2616 // inequality is (28 < 32) and so we're good.
2618 // The value 11 was determined empirically from various Firefox runs. 10 or
2619 // 12 also work pretty well.
2621 static OSet* ocachesL2[4096];
2623 STATIC_ASSERT((OC_BITS_PER_LINE + 11 + 12) < 32);
2624 static inline UInt HASH_OCACHE_TAG ( Addr tag ) {
2625 return (UInt)((tag >> (OC_BITS_PER_LINE + 11)) & 0xFFF);
2628 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2629 return VG_(malloc)(cc, szB);
2631 static void ocacheL2_free ( void* v ) {
2632 VG_(free)( v );
2635 /* Stats: # nodes currently in tree */
2636 static UWord stats__ocacheL2_n_nodes = 0;
2638 static void init_ocacheL2 ( void )
2640 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2641 tl_assert(0 == offsetof(OCacheLine,tag));
2642 for (UInt i = 0; i < 4096; i++) {
2643 tl_assert(!ocachesL2[i]);
2644 ocachesL2[i]
2645 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2646 NULL, /* fast cmp */
2647 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2649 stats__ocacheL2_n_nodes = 0;
2652 /* Find line with the given tag in the tree, or NULL if not found. */
2653 static inline OCacheLine* ocacheL2_find_tag ( Addr tag )
2655 OCacheLine* line;
2656 tl_assert(is_valid_oc_tag(tag));
2657 stats__ocacheL2_finds++;
2658 OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
2659 line = VG_(OSetGen_Lookup)( oset, &tag );
2660 return line;
2663 /* Delete the line with the given tag from the tree, if it is present, and
2664 free up the associated memory. */
2665 static void ocacheL2_del_tag ( Addr tag )
2667 OCacheLine* line;
2668 tl_assert(is_valid_oc_tag(tag));
2669 stats__ocacheL2_dels++;
2670 OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
2671 line = VG_(OSetGen_Remove)( oset, &tag );
2672 if (line) {
2673 VG_(OSetGen_FreeNode)(oset, line);
2674 tl_assert(stats__ocacheL2_n_nodes > 0);
2675 stats__ocacheL2_n_nodes--;
2679 /* Add a copy of the given line to the tree. It must not already be
2680 present. */
2681 static void ocacheL2_add_line ( OCacheLine* line )
2683 OCacheLine* copy;
2684 tl_assert(is_valid_oc_tag(line->tag));
2685 OSet* oset = ocachesL2[HASH_OCACHE_TAG(line->tag)];
2686 copy = VG_(OSetGen_AllocNode)( oset, sizeof(OCacheLine) );
2687 *copy = *line;
2688 stats__ocacheL2_adds++;
2689 VG_(OSetGen_Insert)( oset, copy );
2690 stats__ocacheL2_n_nodes++;
2691 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2692 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2695 ////
2696 //////////////////////////////////////////////////////////////
2698 __attribute__((noinline))
2699 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2701 OCacheLine *victim, *inL2;
2702 UChar c;
2703 UWord line;
2704 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2705 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2706 UWord tag = a & tagmask;
2707 tl_assert(setno >= 0 && setno < OC_N_SETS);
2709 /* we already tried line == 0; skip therefore. */
2710 for (line = 1; line < OC_LINES_PER_SET; line++) {
2711 if (ocacheL1->set[setno].line[line].tag == tag) {
2712 if (line == 1) {
2713 stats_ocacheL1_found_at_1++;
2714 } else {
2715 stats_ocacheL1_found_at_N++;
2717 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2718 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2719 moveLineForwards( &ocacheL1->set[setno], line );
2720 line--;
2722 return &ocacheL1->set[setno].line[line];
2726 /* A miss. Use the last slot. Implicitly this means we're
2727 ejecting the line in the last slot. */
2728 stats_ocacheL1_misses++;
2729 tl_assert(line == OC_LINES_PER_SET);
2730 line--;
2731 tl_assert(line > 0);
2733 /* First, move the to-be-ejected line to the L2 cache. */
2734 victim = &ocacheL1->set[setno].line[line];
2735 c = classify_OCacheLine(victim);
2736 switch (c) {
2737 case 'e':
2738 /* the line is empty (has invalid tag); ignore it. */
2739 break;
2740 case 'z':
2741 /* line contains zeroes. We must ensure the backing store is
2742 updated accordingly, either by copying the line there
2743 verbatim, or by ensuring it isn't present there. We
2744 choose the latter on the basis that it reduces the size of
2745 the backing store. */
2746 ocacheL2_del_tag( victim->tag );
2747 break;
2748 case 'n':
2749 /* line contains at least one real, useful origin. Copy it
2750 to the backing store. */
2751 stats_ocacheL1_lossage++;
2752 inL2 = ocacheL2_find_tag( victim->tag );
2753 if (inL2) {
2754 *inL2 = *victim;
2755 } else {
2756 ocacheL2_add_line( victim );
2758 break;
2759 default:
2760 tl_assert(0);
2763 /* Now we must reload the L1 cache from the backing tree, if
2764 possible. */
2765 tl_assert(tag != victim->tag); /* stay sane */
2766 inL2 = ocacheL2_find_tag( tag );
2767 if (inL2) {
2768 /* We're in luck. It's in the L2. */
2769 ocacheL1->set[setno].line[line] = *inL2;
2770 } else {
2771 /* Missed at both levels of the cache hierarchy. We have to
2772 declare it as full of zeroes (unknown origins). */
2773 stats__ocacheL2_misses++;
2774 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2777 /* Move it one forwards */
2778 moveLineForwards( &ocacheL1->set[setno], line );
2779 line--;
2781 return &ocacheL1->set[setno].line[line];
2784 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2786 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2787 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2788 UWord tag = a & tagmask;
2790 stats_ocacheL1_find++;
2792 if (OC_ENABLE_ASSERTIONS) {
2793 tl_assert(setno >= 0 && setno < OC_N_SETS);
2794 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2797 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2798 return &ocacheL1->set[setno].line[0];
2801 return find_OCacheLine_SLOW( a );
2804 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2806 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2807 //// Set the origins for a+0 .. a+7
2808 { OCacheLine* line;
2809 UWord lineoff = oc_line_offset(a);
2810 if (OC_ENABLE_ASSERTIONS) {
2811 tl_assert(lineoff >= 0
2812 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2814 line = find_OCacheLine( a );
2815 line->u.main.descr[lineoff+0] = 0xF;
2816 line->u.main.descr[lineoff+1] = 0xF;
2817 line->u.main.w32[lineoff+0] = otag;
2818 line->u.main.w32[lineoff+1] = otag;
2820 //// END inlined, specialised version of MC_(helperc_b_store8)
2824 /*------------------------------------------------------------*/
2825 /*--- Aligned fast case permission setters, ---*/
2826 /*--- for dealing with stacks ---*/
2827 /*------------------------------------------------------------*/
2829 /*--------------------- 32-bit ---------------------*/
2831 /* Nb: by "aligned" here we mean 4-byte aligned */
2833 static INLINE void make_aligned_word32_undefined ( Addr a )
2835 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2837 #ifndef PERF_FAST_STACK2
2838 make_mem_undefined(a, 4);
2839 #else
2841 UWord sm_off;
2842 SecMap* sm;
2844 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2845 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2846 make_mem_undefined(a, 4);
2847 return;
2850 sm = get_secmap_for_writing_low(a);
2851 sm_off = SM_OFF(a);
2852 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2854 #endif
2857 static INLINE
2858 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2860 make_aligned_word32_undefined(a);
2861 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2862 //// Set the origins for a+0 .. a+3
2863 { OCacheLine* line;
2864 UWord lineoff = oc_line_offset(a);
2865 if (OC_ENABLE_ASSERTIONS) {
2866 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2868 line = find_OCacheLine( a );
2869 line->u.main.descr[lineoff] = 0xF;
2870 line->u.main.w32[lineoff] = otag;
2872 //// END inlined, specialised version of MC_(helperc_b_store4)
2875 static INLINE
2876 void make_aligned_word32_noaccess ( Addr a )
2878 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2880 #ifndef PERF_FAST_STACK2
2881 MC_(make_mem_noaccess)(a, 4);
2882 #else
2884 UWord sm_off;
2885 SecMap* sm;
2887 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2888 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2889 MC_(make_mem_noaccess)(a, 4);
2890 return;
2893 sm = get_secmap_for_writing_low(a);
2894 sm_off = SM_OFF(a);
2895 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2897 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2898 //// Set the origins for a+0 .. a+3.
2899 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2900 OCacheLine* line;
2901 UWord lineoff = oc_line_offset(a);
2902 if (OC_ENABLE_ASSERTIONS) {
2903 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2905 line = find_OCacheLine( a );
2906 line->u.main.descr[lineoff] = 0;
2908 //// END inlined, specialised version of MC_(helperc_b_store4)
2910 #endif
2913 /*--------------------- 64-bit ---------------------*/
2915 /* Nb: by "aligned" here we mean 8-byte aligned */
2917 static INLINE void make_aligned_word64_undefined ( Addr a )
2919 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2921 #ifndef PERF_FAST_STACK2
2922 make_mem_undefined(a, 8);
2923 #else
2925 UWord sm_off16;
2926 SecMap* sm;
2928 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2929 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2930 make_mem_undefined(a, 8);
2931 return;
2934 sm = get_secmap_for_writing_low(a);
2935 sm_off16 = SM_OFF_16(a);
2936 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
2938 #endif
2941 static INLINE
2942 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2944 make_aligned_word64_undefined(a);
2945 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2946 //// Set the origins for a+0 .. a+7
2947 { OCacheLine* line;
2948 UWord lineoff = oc_line_offset(a);
2949 tl_assert(lineoff >= 0
2950 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2951 line = find_OCacheLine( a );
2952 line->u.main.descr[lineoff+0] = 0xF;
2953 line->u.main.descr[lineoff+1] = 0xF;
2954 line->u.main.w32[lineoff+0] = otag;
2955 line->u.main.w32[lineoff+1] = otag;
2957 //// END inlined, specialised version of MC_(helperc_b_store8)
2960 static INLINE
2961 void make_aligned_word64_noaccess ( Addr a )
2963 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2965 #ifndef PERF_FAST_STACK2
2966 MC_(make_mem_noaccess)(a, 8);
2967 #else
2969 UWord sm_off16;
2970 SecMap* sm;
2972 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2973 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2974 MC_(make_mem_noaccess)(a, 8);
2975 return;
2978 sm = get_secmap_for_writing_low(a);
2979 sm_off16 = SM_OFF_16(a);
2980 sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
2982 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2983 //// Clear the origins for a+0 .. a+7.
2984 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2985 OCacheLine* line;
2986 UWord lineoff = oc_line_offset(a);
2987 tl_assert(lineoff >= 0
2988 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2989 line = find_OCacheLine( a );
2990 line->u.main.descr[lineoff+0] = 0;
2991 line->u.main.descr[lineoff+1] = 0;
2993 //// END inlined, specialised version of MC_(helperc_b_store8)
2995 #endif
2999 /*------------------------------------------------------------*/
3000 /*--- Stack pointer adjustment ---*/
3001 /*------------------------------------------------------------*/
3003 #ifdef PERF_FAST_STACK
3004 # define MAYBE_USED
3005 #else
3006 # define MAYBE_USED __attribute__((unused))
3007 #endif
3009 /*--------------- adjustment by 4 bytes ---------------*/
3011 MAYBE_USED
3012 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
3014 UInt otag = ecu | MC_OKIND_STACK;
3015 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
3016 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3017 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3018 } else {
3019 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
3023 MAYBE_USED
3024 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
3026 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
3027 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3028 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3029 } else {
3030 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
3034 MAYBE_USED
3035 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
3037 PROF_EVENT(MCPE_DIE_MEM_STACK_4);
3038 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3039 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3040 } else {
3041 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
3045 /*--------------- adjustment by 8 bytes ---------------*/
3047 MAYBE_USED
3048 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
3050 UInt otag = ecu | MC_OKIND_STACK;
3051 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
3052 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3053 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3054 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3055 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3056 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
3057 } else {
3058 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
3062 MAYBE_USED
3063 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
3065 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
3066 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3067 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3068 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3069 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3070 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3071 } else {
3072 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
3076 MAYBE_USED
3077 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
3079 PROF_EVENT(MCPE_DIE_MEM_STACK_8);
3080 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3081 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3082 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3083 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3084 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3085 } else {
3086 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
3090 /*--------------- adjustment by 12 bytes ---------------*/
3092 MAYBE_USED
3093 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
3095 UInt otag = ecu | MC_OKIND_STACK;
3096 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
3097 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3098 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3099 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3100 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3101 /* from previous test we don't have 8-alignment at offset +0,
3102 hence must have 8 alignment at offsets +4/-4. Hence safe to
3103 do 4 at +0 and then 8 at +4/. */
3104 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
3106 } else {
3107 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
3111 MAYBE_USED
3112 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
3114 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
3115 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3116 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3117 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3118 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3119 /* from previous test we don't have 8-alignment at offset +0,
3120 hence must have 8 alignment at offsets +4/-4. Hence safe to
3121 do 4 at +0 and then 8 at +4/. */
3122 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3123 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3124 } else {
3125 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
3129 MAYBE_USED
3130 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
3132 PROF_EVENT(MCPE_DIE_MEM_STACK_12);
3133 /* Note the -12 in the test */
3134 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
3135 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3136 -4. */
3137 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3138 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3139 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3140 /* We have 4-alignment at +0, but we don't have 8-alignment at
3141 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3142 and then 8 at -8. */
3143 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3144 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3145 } else {
3146 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
3150 /*--------------- adjustment by 16 bytes ---------------*/
3152 MAYBE_USED
3153 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
3155 UInt otag = ecu | MC_OKIND_STACK;
3156 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3157 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3158 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3159 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3160 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3161 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3162 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3163 Hence do 4 at +0, 8 at +4, 4 at +12. */
3164 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3165 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3166 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3167 } else {
3168 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
3172 MAYBE_USED
3173 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
3175 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3176 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3177 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3178 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3179 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3180 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3181 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3182 Hence do 4 at +0, 8 at +4, 4 at +12. */
3183 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3184 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3185 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3186 } else {
3187 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
3191 MAYBE_USED
3192 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
3194 PROF_EVENT(MCPE_DIE_MEM_STACK_16);
3195 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3196 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3197 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3198 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3199 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3200 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3201 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3202 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3203 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3204 } else {
3205 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
3209 /*--------------- adjustment by 32 bytes ---------------*/
3211 MAYBE_USED
3212 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
3214 UInt otag = ecu | MC_OKIND_STACK;
3215 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3216 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3217 /* Straightforward */
3218 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3219 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3220 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3221 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3222 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3223 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3224 +0,+28. */
3225 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3226 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3227 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3228 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3229 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3230 } else {
3231 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3235 MAYBE_USED
3236 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3238 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3239 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3240 /* Straightforward */
3241 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3242 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3243 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3244 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3245 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3246 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3247 +0,+28. */
3248 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3249 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3250 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3251 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3252 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3253 } else {
3254 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3258 MAYBE_USED
3259 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3261 PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3262 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3263 /* Straightforward */
3264 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3265 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3266 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3267 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3268 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3269 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3270 4 at -32,-4. */
3271 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3272 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3273 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3274 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3275 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3276 } else {
3277 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3281 /*--------------- adjustment by 112 bytes ---------------*/
3283 MAYBE_USED
3284 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3286 UInt otag = ecu | MC_OKIND_STACK;
3287 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3288 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3293 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3294 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3295 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3296 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3297 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3298 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3299 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3300 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3301 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3302 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3303 } else {
3304 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3308 MAYBE_USED
3309 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3311 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3312 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3321 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3322 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3323 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3324 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3325 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3326 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3327 } else {
3328 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3332 MAYBE_USED
3333 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3335 PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3336 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3349 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3350 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3351 } else {
3352 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3356 /*--------------- adjustment by 128 bytes ---------------*/
3358 MAYBE_USED
3359 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3361 UInt otag = ecu | MC_OKIND_STACK;
3362 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3363 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3379 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3380 } else {
3381 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3385 MAYBE_USED
3386 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3388 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3389 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3390 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3391 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3406 } else {
3407 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3411 MAYBE_USED
3412 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3414 PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3415 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3416 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3417 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3418 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3419 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3420 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3421 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3432 } else {
3433 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3437 /*--------------- adjustment by 144 bytes ---------------*/
3439 MAYBE_USED
3440 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3442 UInt otag = ecu | MC_OKIND_STACK;
3443 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3444 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3445 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3446 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3447 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3448 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3449 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3450 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3451 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3452 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3453 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3454 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3455 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3456 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3457 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3458 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3459 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3460 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3461 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3462 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3463 } else {
3464 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3468 MAYBE_USED
3469 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3471 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3472 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3473 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3474 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3475 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3476 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3477 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3478 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3479 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3480 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3481 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3482 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3483 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3484 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3485 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3486 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3487 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3488 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3489 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3490 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3491 } else {
3492 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3496 MAYBE_USED
3497 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3499 PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3500 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3501 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3502 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3503 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3504 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3505 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3506 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3507 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3508 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3509 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3510 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3511 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3512 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3513 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3514 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3515 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3516 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3517 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3518 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3519 } else {
3520 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3524 /*--------------- adjustment by 160 bytes ---------------*/
3526 MAYBE_USED
3527 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3529 UInt otag = ecu | MC_OKIND_STACK;
3530 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3531 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3532 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3533 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3534 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3535 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3536 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3537 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3538 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3539 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3540 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3541 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3542 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3543 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3544 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3545 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3546 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3547 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3548 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3549 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3550 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3551 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3552 } else {
3553 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3557 MAYBE_USED
3558 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3560 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3561 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3562 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3563 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3564 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3565 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3566 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3567 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3568 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3569 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3570 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3571 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3572 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3573 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3574 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3575 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3576 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3577 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3578 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3579 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3580 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3581 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3582 } else {
3583 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3587 MAYBE_USED
3588 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3590 PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3591 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3592 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3593 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3594 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3595 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3596 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3597 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3598 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3599 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3600 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3601 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3602 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3603 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3604 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3605 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3606 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3607 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3608 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3609 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3610 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3611 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3612 } else {
3613 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3617 /*--------------- adjustment by N bytes ---------------*/
3619 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3621 UInt otag = ecu | MC_OKIND_STACK;
3622 PROF_EVENT(MCPE_NEW_MEM_STACK);
3623 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3626 static void mc_new_mem_stack ( Addr a, SizeT len )
3628 PROF_EVENT(MCPE_NEW_MEM_STACK);
3629 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3632 static void mc_die_mem_stack ( Addr a, SizeT len )
3634 PROF_EVENT(MCPE_DIE_MEM_STACK);
3635 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3639 /* The AMD64 ABI says:
3641 "The 128-byte area beyond the location pointed to by %rsp is considered
3642 to be reserved and shall not be modified by signal or interrupt
3643 handlers. Therefore, functions may use this area for temporary data
3644 that is not needed across function calls. In particular, leaf functions
3645 may use this area for their entire stack frame, rather than adjusting
3646 the stack pointer in the prologue and epilogue. This area is known as
3647 red zone [sic]."
3649 So after any call or return we need to mark this redzone as containing
3650 undefined values.
3652 Consider this: we're in function f. f calls g. g moves rsp down
3653 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3654 defined. g returns. f is buggy and reads from parts of the red zone
3655 that it didn't write on. But because g filled that area in, f is going
3656 to be picking up defined V bits and so any errors from reading bits of
3657 the red zone it didn't write, will be missed. The only solution I could
3658 think of was to make the red zone undefined when g returns to f.
3660 This is in accordance with the ABI, which makes it clear the redzone
3661 is volatile across function calls.
3663 The problem occurs the other way round too: f could fill the RZ up
3664 with defined values and g could mistakenly read them. So the RZ
3665 also needs to be nuked on function calls.
3669 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3670 improved so as to have a lower miss rate. */
3672 static UWord stats__nia_cache_queries = 0;
3673 static UWord stats__nia_cache_misses = 0;
3675 typedef
3676 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3677 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3678 WCacheEnt;
3680 #define N_NIA_TO_ECU_CACHE 511
3682 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3684 static void init_nia_to_ecu_cache ( void )
3686 UWord i;
3687 Addr zero_addr = 0;
3688 ExeContext* zero_ec;
3689 UInt zero_ecu;
3690 /* Fill all the slots with an entry for address zero, and the
3691 relevant otags accordingly. Hence the cache is initially filled
3692 with valid data. */
3693 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3694 tl_assert(zero_ec);
3695 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3696 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3697 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3698 nia_to_ecu_cache[i].nia0 = zero_addr;
3699 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3700 nia_to_ecu_cache[i].nia1 = zero_addr;
3701 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3705 static inline UInt convert_nia_to_ecu ( Addr nia )
3707 UWord i;
3708 UInt ecu;
3709 ExeContext* ec;
3711 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3713 stats__nia_cache_queries++;
3714 i = nia % N_NIA_TO_ECU_CACHE;
3715 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3717 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3718 return nia_to_ecu_cache[i].ecu0;
3720 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3721 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3722 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3723 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3724 # undef SWAP
3725 return nia_to_ecu_cache[i].ecu0;
3728 stats__nia_cache_misses++;
3729 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3730 tl_assert(ec);
3731 ecu = VG_(get_ECU_from_ExeContext)(ec);
3732 tl_assert(VG_(is_plausible_ECU)(ecu));
3734 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3735 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3737 nia_to_ecu_cache[i].nia0 = nia;
3738 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3739 return ecu;
3743 /* This marks the stack as addressible but undefined, after a call or
3744 return for a target that has an ABI defined stack redzone. It
3745 happens quite a lot and needs to be fast. This is the version for
3746 origin tracking. The non-origin-tracking version is below. */
3747 VG_REGPARM(3)
3748 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
3750 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
3751 if (0)
3752 VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3753 base, len, nia );
3755 UInt ecu = convert_nia_to_ecu ( nia );
3756 tl_assert(VG_(is_plausible_ECU)(ecu));
3758 UInt otag = ecu | MC_OKIND_STACK;
3760 # if 0
3761 /* Slow(ish) version, which is fairly easily seen to be correct.
3763 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3764 make_aligned_word64_undefined_w_otag(base + 0, otag);
3765 make_aligned_word64_undefined_w_otag(base + 8, otag);
3766 make_aligned_word64_undefined_w_otag(base + 16, otag);
3767 make_aligned_word64_undefined_w_otag(base + 24, otag);
3769 make_aligned_word64_undefined_w_otag(base + 32, otag);
3770 make_aligned_word64_undefined_w_otag(base + 40, otag);
3771 make_aligned_word64_undefined_w_otag(base + 48, otag);
3772 make_aligned_word64_undefined_w_otag(base + 56, otag);
3774 make_aligned_word64_undefined_w_otag(base + 64, otag);
3775 make_aligned_word64_undefined_w_otag(base + 72, otag);
3776 make_aligned_word64_undefined_w_otag(base + 80, otag);
3777 make_aligned_word64_undefined_w_otag(base + 88, otag);
3779 make_aligned_word64_undefined_w_otag(base + 96, otag);
3780 make_aligned_word64_undefined_w_otag(base + 104, otag);
3781 make_aligned_word64_undefined_w_otag(base + 112, otag);
3782 make_aligned_word64_undefined_w_otag(base + 120, otag);
3783 } else {
3784 MC_(make_mem_undefined_w_otag)(base, len, otag);
3786 # endif
3788 /* Idea is: go fast when
3789 * 8-aligned and length is 128
3790 * the sm is available in the main primary map
3791 * the address range falls entirely with a single secondary map
3792 If all those conditions hold, just update the V+A bits by writing
3793 directly into the vabits array. (If the sm was distinguished, this
3794 will make a copy and then write to it.)
3796 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3797 /* Now we know the address range is suitably sized and aligned. */
3798 UWord a_lo = (UWord)(base);
3799 UWord a_hi = (UWord)(base + 128 - 1);
3800 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3801 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3802 /* Now we know the entire range is within the main primary map. */
3803 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3804 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3805 if (LIKELY(pm_off_lo == pm_off_hi)) {
3806 /* Now we know that the entire address range falls within a
3807 single secondary map, and that that secondary 'lives' in
3808 the main primary map. */
3809 SecMap* sm = get_secmap_for_writing_low(a_lo);
3810 UWord v_off16 = SM_OFF_16(a_lo);
3811 UShort* p = &sm->vabits16[v_off16];
3812 p[ 0] = VA_BITS16_UNDEFINED;
3813 p[ 1] = VA_BITS16_UNDEFINED;
3814 p[ 2] = VA_BITS16_UNDEFINED;
3815 p[ 3] = VA_BITS16_UNDEFINED;
3816 p[ 4] = VA_BITS16_UNDEFINED;
3817 p[ 5] = VA_BITS16_UNDEFINED;
3818 p[ 6] = VA_BITS16_UNDEFINED;
3819 p[ 7] = VA_BITS16_UNDEFINED;
3820 p[ 8] = VA_BITS16_UNDEFINED;
3821 p[ 9] = VA_BITS16_UNDEFINED;
3822 p[10] = VA_BITS16_UNDEFINED;
3823 p[11] = VA_BITS16_UNDEFINED;
3824 p[12] = VA_BITS16_UNDEFINED;
3825 p[13] = VA_BITS16_UNDEFINED;
3826 p[14] = VA_BITS16_UNDEFINED;
3827 p[15] = VA_BITS16_UNDEFINED;
3828 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3829 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3830 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3831 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3832 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3833 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3834 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3835 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3836 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3837 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3838 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3839 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3840 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3841 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3842 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3843 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3844 return;
3849 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3850 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3851 /* Now we know the address range is suitably sized and aligned. */
3852 UWord a_lo = (UWord)(base);
3853 UWord a_hi = (UWord)(base + 288 - 1);
3854 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3855 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3856 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3857 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3858 if (LIKELY(pm_off_lo == pm_off_hi)) {
3859 /* Now we know that the entire address range falls within a
3860 single secondary map, and that that secondary 'lives' in
3861 the main primary map. */
3862 SecMap* sm = get_secmap_for_writing_low(a_lo);
3863 UWord v_off16 = SM_OFF_16(a_lo);
3864 UShort* p = &sm->vabits16[v_off16];
3865 p[ 0] = VA_BITS16_UNDEFINED;
3866 p[ 1] = VA_BITS16_UNDEFINED;
3867 p[ 2] = VA_BITS16_UNDEFINED;
3868 p[ 3] = VA_BITS16_UNDEFINED;
3869 p[ 4] = VA_BITS16_UNDEFINED;
3870 p[ 5] = VA_BITS16_UNDEFINED;
3871 p[ 6] = VA_BITS16_UNDEFINED;
3872 p[ 7] = VA_BITS16_UNDEFINED;
3873 p[ 8] = VA_BITS16_UNDEFINED;
3874 p[ 9] = VA_BITS16_UNDEFINED;
3875 p[10] = VA_BITS16_UNDEFINED;
3876 p[11] = VA_BITS16_UNDEFINED;
3877 p[12] = VA_BITS16_UNDEFINED;
3878 p[13] = VA_BITS16_UNDEFINED;
3879 p[14] = VA_BITS16_UNDEFINED;
3880 p[15] = VA_BITS16_UNDEFINED;
3881 p[16] = VA_BITS16_UNDEFINED;
3882 p[17] = VA_BITS16_UNDEFINED;
3883 p[18] = VA_BITS16_UNDEFINED;
3884 p[19] = VA_BITS16_UNDEFINED;
3885 p[20] = VA_BITS16_UNDEFINED;
3886 p[21] = VA_BITS16_UNDEFINED;
3887 p[22] = VA_BITS16_UNDEFINED;
3888 p[23] = VA_BITS16_UNDEFINED;
3889 p[24] = VA_BITS16_UNDEFINED;
3890 p[25] = VA_BITS16_UNDEFINED;
3891 p[26] = VA_BITS16_UNDEFINED;
3892 p[27] = VA_BITS16_UNDEFINED;
3893 p[28] = VA_BITS16_UNDEFINED;
3894 p[29] = VA_BITS16_UNDEFINED;
3895 p[30] = VA_BITS16_UNDEFINED;
3896 p[31] = VA_BITS16_UNDEFINED;
3897 p[32] = VA_BITS16_UNDEFINED;
3898 p[33] = VA_BITS16_UNDEFINED;
3899 p[34] = VA_BITS16_UNDEFINED;
3900 p[35] = VA_BITS16_UNDEFINED;
3901 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3902 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3903 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3904 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3905 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3906 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3907 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3908 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3909 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3910 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3911 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3912 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3913 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3914 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3915 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3916 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3917 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3918 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3919 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3920 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3921 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3922 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3923 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3924 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3925 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3926 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3927 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3928 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3929 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3930 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3931 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3932 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3933 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3934 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3935 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3936 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3937 return;
3942 /* else fall into slow case */
3943 MC_(make_mem_undefined_w_otag)(base, len, otag);
3947 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3948 specialised for the non-origin-tracking case. */
3949 VG_REGPARM(2)
3950 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
3952 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
3953 if (0)
3954 VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3955 base, len );
3957 # if 0
3958 /* Slow(ish) version, which is fairly easily seen to be correct.
3960 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3961 make_aligned_word64_undefined(base + 0);
3962 make_aligned_word64_undefined(base + 8);
3963 make_aligned_word64_undefined(base + 16);
3964 make_aligned_word64_undefined(base + 24);
3966 make_aligned_word64_undefined(base + 32);
3967 make_aligned_word64_undefined(base + 40);
3968 make_aligned_word64_undefined(base + 48);
3969 make_aligned_word64_undefined(base + 56);
3971 make_aligned_word64_undefined(base + 64);
3972 make_aligned_word64_undefined(base + 72);
3973 make_aligned_word64_undefined(base + 80);
3974 make_aligned_word64_undefined(base + 88);
3976 make_aligned_word64_undefined(base + 96);
3977 make_aligned_word64_undefined(base + 104);
3978 make_aligned_word64_undefined(base + 112);
3979 make_aligned_word64_undefined(base + 120);
3980 } else {
3981 make_mem_undefined(base, len);
3983 # endif
3985 /* Idea is: go fast when
3986 * 8-aligned and length is 128
3987 * the sm is available in the main primary map
3988 * the address range falls entirely with a single secondary map
3989 If all those conditions hold, just update the V+A bits by writing
3990 directly into the vabits array. (If the sm was distinguished, this
3991 will make a copy and then write to it.)
3993 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3994 /* Now we know the address range is suitably sized and aligned. */
3995 UWord a_lo = (UWord)(base);
3996 UWord a_hi = (UWord)(base + 128 - 1);
3997 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3998 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3999 /* Now we know the entire range is within the main primary map. */
4000 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4001 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4002 if (LIKELY(pm_off_lo == pm_off_hi)) {
4003 /* Now we know that the entire address range falls within a
4004 single secondary map, and that that secondary 'lives' in
4005 the main primary map. */
4006 SecMap* sm = get_secmap_for_writing_low(a_lo);
4007 UWord v_off16 = SM_OFF_16(a_lo);
4008 UShort* p = &sm->vabits16[v_off16];
4009 p[ 0] = VA_BITS16_UNDEFINED;
4010 p[ 1] = VA_BITS16_UNDEFINED;
4011 p[ 2] = VA_BITS16_UNDEFINED;
4012 p[ 3] = VA_BITS16_UNDEFINED;
4013 p[ 4] = VA_BITS16_UNDEFINED;
4014 p[ 5] = VA_BITS16_UNDEFINED;
4015 p[ 6] = VA_BITS16_UNDEFINED;
4016 p[ 7] = VA_BITS16_UNDEFINED;
4017 p[ 8] = VA_BITS16_UNDEFINED;
4018 p[ 9] = VA_BITS16_UNDEFINED;
4019 p[10] = VA_BITS16_UNDEFINED;
4020 p[11] = VA_BITS16_UNDEFINED;
4021 p[12] = VA_BITS16_UNDEFINED;
4022 p[13] = VA_BITS16_UNDEFINED;
4023 p[14] = VA_BITS16_UNDEFINED;
4024 p[15] = VA_BITS16_UNDEFINED;
4025 return;
4030 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
4031 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
4032 /* Now we know the address range is suitably sized and aligned. */
4033 UWord a_lo = (UWord)(base);
4034 UWord a_hi = (UWord)(base + 288 - 1);
4035 tl_assert(a_lo < a_hi); // paranoia: detect overflow
4036 if (a_hi <= MAX_PRIMARY_ADDRESS) {
4037 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4038 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4039 if (LIKELY(pm_off_lo == pm_off_hi)) {
4040 /* Now we know that the entire address range falls within a
4041 single secondary map, and that that secondary 'lives' in
4042 the main primary map. */
4043 SecMap* sm = get_secmap_for_writing_low(a_lo);
4044 UWord v_off16 = SM_OFF_16(a_lo);
4045 UShort* p = &sm->vabits16[v_off16];
4046 p[ 0] = VA_BITS16_UNDEFINED;
4047 p[ 1] = VA_BITS16_UNDEFINED;
4048 p[ 2] = VA_BITS16_UNDEFINED;
4049 p[ 3] = VA_BITS16_UNDEFINED;
4050 p[ 4] = VA_BITS16_UNDEFINED;
4051 p[ 5] = VA_BITS16_UNDEFINED;
4052 p[ 6] = VA_BITS16_UNDEFINED;
4053 p[ 7] = VA_BITS16_UNDEFINED;
4054 p[ 8] = VA_BITS16_UNDEFINED;
4055 p[ 9] = VA_BITS16_UNDEFINED;
4056 p[10] = VA_BITS16_UNDEFINED;
4057 p[11] = VA_BITS16_UNDEFINED;
4058 p[12] = VA_BITS16_UNDEFINED;
4059 p[13] = VA_BITS16_UNDEFINED;
4060 p[14] = VA_BITS16_UNDEFINED;
4061 p[15] = VA_BITS16_UNDEFINED;
4062 p[16] = VA_BITS16_UNDEFINED;
4063 p[17] = VA_BITS16_UNDEFINED;
4064 p[18] = VA_BITS16_UNDEFINED;
4065 p[19] = VA_BITS16_UNDEFINED;
4066 p[20] = VA_BITS16_UNDEFINED;
4067 p[21] = VA_BITS16_UNDEFINED;
4068 p[22] = VA_BITS16_UNDEFINED;
4069 p[23] = VA_BITS16_UNDEFINED;
4070 p[24] = VA_BITS16_UNDEFINED;
4071 p[25] = VA_BITS16_UNDEFINED;
4072 p[26] = VA_BITS16_UNDEFINED;
4073 p[27] = VA_BITS16_UNDEFINED;
4074 p[28] = VA_BITS16_UNDEFINED;
4075 p[29] = VA_BITS16_UNDEFINED;
4076 p[30] = VA_BITS16_UNDEFINED;
4077 p[31] = VA_BITS16_UNDEFINED;
4078 p[32] = VA_BITS16_UNDEFINED;
4079 p[33] = VA_BITS16_UNDEFINED;
4080 p[34] = VA_BITS16_UNDEFINED;
4081 p[35] = VA_BITS16_UNDEFINED;
4082 return;
4087 /* else fall into slow case */
4088 make_mem_undefined(base, len);
4092 /* And this is an even more specialised case, for the case where there
4093 is no origin tracking, and the length is 128. */
4094 VG_REGPARM(1)
4095 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
4097 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
4098 if (0)
4099 VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
4101 # if 0
4102 /* Slow(ish) version, which is fairly easily seen to be correct.
4104 if (LIKELY( VG_IS_8_ALIGNED(base) )) {
4105 make_aligned_word64_undefined(base + 0);
4106 make_aligned_word64_undefined(base + 8);
4107 make_aligned_word64_undefined(base + 16);
4108 make_aligned_word64_undefined(base + 24);
4110 make_aligned_word64_undefined(base + 32);
4111 make_aligned_word64_undefined(base + 40);
4112 make_aligned_word64_undefined(base + 48);
4113 make_aligned_word64_undefined(base + 56);
4115 make_aligned_word64_undefined(base + 64);
4116 make_aligned_word64_undefined(base + 72);
4117 make_aligned_word64_undefined(base + 80);
4118 make_aligned_word64_undefined(base + 88);
4120 make_aligned_word64_undefined(base + 96);
4121 make_aligned_word64_undefined(base + 104);
4122 make_aligned_word64_undefined(base + 112);
4123 make_aligned_word64_undefined(base + 120);
4124 } else {
4125 make_mem_undefined(base, 128);
4127 # endif
4129 /* Idea is: go fast when
4130 * 16-aligned and length is 128
4131 * the sm is available in the main primary map
4132 * the address range falls entirely with a single secondary map
4133 If all those conditions hold, just update the V+A bits by writing
4134 directly into the vabits array. (If the sm was distinguished, this
4135 will make a copy and then write to it.)
4137 Typically this applies to amd64 'ret' instructions, since RSP is
4138 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4140 if (LIKELY( VG_IS_16_ALIGNED(base) )) {
4141 /* Now we know the address range is suitably sized and aligned. */
4142 UWord a_lo = (UWord)(base);
4143 UWord a_hi = (UWord)(base + 128 - 1);
4144 /* FIXME: come up with a sane story on the wraparound case
4145 (which of course cnanot happen, but still..) */
4146 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4147 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4148 /* Now we know the entire range is within the main primary map. */
4149 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4150 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4151 if (LIKELY(pm_off_lo == pm_off_hi)) {
4152 /* Now we know that the entire address range falls within a
4153 single secondary map, and that that secondary 'lives' in
4154 the main primary map. */
4155 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
4156 SecMap* sm = get_secmap_for_writing_low(a_lo);
4157 UWord v_off = SM_OFF(a_lo);
4158 UInt* w32 = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
4159 w32[ 0] = VA_BITS32_UNDEFINED;
4160 w32[ 1] = VA_BITS32_UNDEFINED;
4161 w32[ 2] = VA_BITS32_UNDEFINED;
4162 w32[ 3] = VA_BITS32_UNDEFINED;
4163 w32[ 4] = VA_BITS32_UNDEFINED;
4164 w32[ 5] = VA_BITS32_UNDEFINED;
4165 w32[ 6] = VA_BITS32_UNDEFINED;
4166 w32[ 7] = VA_BITS32_UNDEFINED;
4167 return;
4172 /* The same, but for when base is 8 % 16, which is the situation
4173 with RSP for amd64-ELF immediately after call instructions.
4175 if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
4176 /* Now we know the address range is suitably sized and aligned. */
4177 UWord a_lo = (UWord)(base);
4178 UWord a_hi = (UWord)(base + 128 - 1);
4179 /* FIXME: come up with a sane story on the wraparound case
4180 (which of course cnanot happen, but still..) */
4181 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4182 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4183 /* Now we know the entire range is within the main primary map. */
4184 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4185 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4186 if (LIKELY(pm_off_lo == pm_off_hi)) {
4187 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
4188 /* Now we know that the entire address range falls within a
4189 single secondary map, and that that secondary 'lives' in
4190 the main primary map. */
4191 SecMap* sm = get_secmap_for_writing_low(a_lo);
4192 UWord v_off16 = SM_OFF_16(a_lo);
4193 UShort* w16 = &sm->vabits16[v_off16];
4194 UInt* w32 = ASSUME_ALIGNED(UInt*, &w16[1]);
4195 /* The following assertion is commented out for obvious
4196 performance reasons, but was verified as valid when
4197 running the entire testsuite and also Firefox. */
4198 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4199 w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
4200 w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
4201 w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
4202 w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
4203 w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
4204 w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
4205 w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
4206 w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
4207 w16[15] = VA_BITS16_UNDEFINED; // w16[15]
4208 return;
4213 /* else fall into slow case */
4214 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
4215 make_mem_undefined(base, 128);
4219 /*------------------------------------------------------------*/
4220 /*--- Checking memory ---*/
4221 /*------------------------------------------------------------*/
4223 typedef
4224 enum {
4225 MC_Ok = 5,
4226 MC_AddrErr = 6,
4227 MC_ValueErr = 7
4229 MC_ReadResult;
4232 /* Check permissions for address range. If inadequate permissions
4233 exist, *bad_addr is set to the offending address, so the caller can
4234 know what it is. */
4236 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4237 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4238 indicate the lowest failing address. Functions below are
4239 similar. */
4240 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
4242 SizeT i;
4243 UWord vabits2;
4245 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
4246 for (i = 0; i < len; i++) {
4247 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
4248 vabits2 = get_vabits2(a);
4249 if (VA_BITS2_NOACCESS != vabits2) {
4250 if (bad_addr != NULL) *bad_addr = a;
4251 return False;
4253 a++;
4255 return True;
4258 static Bool is_mem_addressable ( Addr a, SizeT len,
4259 /*OUT*/Addr* bad_addr )
4261 SizeT i;
4262 UWord vabits2;
4264 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
4265 for (i = 0; i < len; i++) {
4266 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
4267 vabits2 = get_vabits2(a);
4268 if (VA_BITS2_NOACCESS == vabits2) {
4269 if (bad_addr != NULL) *bad_addr = a;
4270 return False;
4272 a++;
4274 return True;
4277 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
4278 /*OUT*/Addr* bad_addr,
4279 /*OUT*/UInt* otag )
4281 SizeT i;
4282 UWord vabits2;
4284 PROF_EVENT(MCPE_IS_MEM_DEFINED);
4285 DEBUG("is_mem_defined\n");
4287 if (otag) *otag = 0;
4288 if (bad_addr) *bad_addr = 0;
4289 for (i = 0; i < len; i++) {
4290 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
4291 vabits2 = get_vabits2(a);
4292 if (VA_BITS2_DEFINED != vabits2) {
4293 // Error! Nb: Report addressability errors in preference to
4294 // definedness errors. And don't report definedeness errors unless
4295 // --undef-value-errors=yes.
4296 if (bad_addr) {
4297 *bad_addr = a;
4299 if (VA_BITS2_NOACCESS == vabits2) {
4300 return MC_AddrErr;
4302 if (MC_(clo_mc_level) >= 2) {
4303 if (otag && MC_(clo_mc_level) == 3) {
4304 *otag = MC_(helperc_b_load1)( a );
4306 return MC_ValueErr;
4309 a++;
4311 return MC_Ok;
4315 /* Like is_mem_defined but doesn't give up at the first uninitialised
4316 byte -- the entire range is always checked. This is important for
4317 detecting errors in the case where a checked range strays into
4318 invalid memory, but that fact is not detected by the ordinary
4319 is_mem_defined(), because of an undefined section that precedes the
4320 out of range section, possibly as a result of an alignment hole in
4321 the checked data. This version always checks the entire range and
4322 can report both a definedness and an accessbility error, if
4323 necessary. */
4324 static void is_mem_defined_comprehensive (
4325 Addr a, SizeT len,
4326 /*OUT*/Bool* errorV, /* is there a definedness err? */
4327 /*OUT*/Addr* bad_addrV, /* if so where? */
4328 /*OUT*/UInt* otagV, /* and what's its otag? */
4329 /*OUT*/Bool* errorA, /* is there an addressability err? */
4330 /*OUT*/Addr* bad_addrA /* if so where? */
4333 SizeT i;
4334 UWord vabits2;
4335 Bool already_saw_errV = False;
4337 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
4338 DEBUG("is_mem_defined_comprehensive\n");
4340 tl_assert(!(*errorV || *errorA));
4342 for (i = 0; i < len; i++) {
4343 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
4344 vabits2 = get_vabits2(a);
4345 switch (vabits2) {
4346 case VA_BITS2_DEFINED:
4347 a++;
4348 break;
4349 case VA_BITS2_UNDEFINED:
4350 case VA_BITS2_PARTDEFINED:
4351 if (!already_saw_errV) {
4352 *errorV = True;
4353 *bad_addrV = a;
4354 if (MC_(clo_mc_level) == 3) {
4355 *otagV = MC_(helperc_b_load1)( a );
4356 } else {
4357 *otagV = 0;
4359 already_saw_errV = True;
4361 a++; /* keep going */
4362 break;
4363 case VA_BITS2_NOACCESS:
4364 *errorA = True;
4365 *bad_addrA = a;
4366 return; /* give up now. */
4367 default:
4368 tl_assert(0);
4374 /* Check a zero-terminated ascii string. Tricky -- don't want to
4375 examine the actual bytes, to find the end, until we're sure it is
4376 safe to do so. */
4378 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
4380 UWord vabits2;
4382 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
4383 DEBUG("mc_is_defined_asciiz\n");
4385 if (otag) *otag = 0;
4386 if (bad_addr) *bad_addr = 0;
4387 while (True) {
4388 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
4389 vabits2 = get_vabits2(a);
4390 if (VA_BITS2_DEFINED != vabits2) {
4391 // Error! Nb: Report addressability errors in preference to
4392 // definedness errors. And don't report definedeness errors unless
4393 // --undef-value-errors=yes.
4394 if (bad_addr) {
4395 *bad_addr = a;
4397 if (VA_BITS2_NOACCESS == vabits2) {
4398 return MC_AddrErr;
4400 if (MC_(clo_mc_level) >= 2) {
4401 if (otag && MC_(clo_mc_level) == 3) {
4402 *otag = MC_(helperc_b_load1)( a );
4404 return MC_ValueErr;
4407 /* Ok, a is safe to read. */
4408 if (* ((UChar*)a) == 0) {
4409 return MC_Ok;
4411 a++;
4416 /*------------------------------------------------------------*/
4417 /*--- Memory event handlers ---*/
4418 /*------------------------------------------------------------*/
4420 static
4421 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
4422 Addr base, SizeT size )
4424 Addr bad_addr;
4425 Bool ok = is_mem_addressable ( base, size, &bad_addr );
4427 if (!ok) {
4428 switch (part) {
4429 case Vg_CoreSysCall:
4430 MC_(record_memparam_error) ( tid, bad_addr,
4431 /*isAddrErr*/True, s, 0/*otag*/ );
4432 break;
4434 case Vg_CoreSignal:
4435 MC_(record_core_mem_error)( tid, s );
4436 break;
4438 default:
4439 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
4444 static
4445 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
4446 Addr base, SizeT size )
4448 UInt otag = 0;
4449 Addr bad_addr;
4450 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
4452 if (MC_Ok != res) {
4453 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4455 switch (part) {
4456 case Vg_CoreSysCall:
4457 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4458 isAddrErr ? 0 : otag );
4459 break;
4461 case Vg_CoreSysCallArgInMem:
4462 MC_(record_regparam_error) ( tid, s, otag );
4463 break;
4465 /* If we're being asked to jump to a silly address, record an error
4466 message before potentially crashing the entire system. */
4467 case Vg_CoreTranslate:
4468 MC_(record_jump_error)( tid, bad_addr );
4469 break;
4471 default:
4472 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
4477 static
4478 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
4479 const HChar* s, Addr str )
4481 MC_ReadResult res;
4482 Addr bad_addr = 0; // shut GCC up
4483 UInt otag = 0;
4485 tl_assert(part == Vg_CoreSysCall);
4486 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4487 if (MC_Ok != res) {
4488 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4489 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4490 isAddrErr ? 0 : otag );
4494 /* Handling of mmap and mprotect is not as simple as it seems.
4496 The underlying semantics are that memory obtained from mmap is
4497 always initialised, but may be inaccessible. And changes to the
4498 protection of memory do not change its contents and hence not its
4499 definedness state. Problem is we can't model
4500 inaccessible-but-with-some-definedness state; once we mark memory
4501 as inaccessible we lose all info about definedness, and so can't
4502 restore that if it is later made accessible again.
4504 One obvious thing to do is this:
4506 mmap/mprotect NONE -> noaccess
4507 mmap/mprotect other -> defined
4509 The problem case here is: taking accessible memory, writing
4510 uninitialised data to it, mprotecting it NONE and later mprotecting
4511 it back to some accessible state causes the undefinedness to be
4512 lost.
4514 A better proposal is:
4516 (1) mmap NONE -> make noaccess
4517 (2) mmap other -> make defined
4519 (3) mprotect NONE -> # no change
4520 (4) mprotect other -> change any "noaccess" to "defined"
4522 (2) is OK because memory newly obtained from mmap really is defined
4523 (zeroed out by the kernel -- doing anything else would
4524 constitute a massive security hole.)
4526 (1) is OK because the only way to make the memory usable is via
4527 (4), in which case we also wind up correctly marking it all as
4528 defined.
4530 (3) is the weak case. We choose not to change memory state.
4531 (presumably the range is in some mixture of "defined" and
4532 "undefined", viz, accessible but with arbitrary V bits). Doing
4533 nothing means we retain the V bits, so that if the memory is
4534 later mprotected "other", the V bits remain unchanged, so there
4535 can be no false negatives. The bad effect is that if there's
4536 an access in the area, then MC cannot warn; but at least we'll
4537 get a SEGV to show, so it's better than nothing.
4539 Consider the sequence (3) followed by (4). Any memory that was
4540 "defined" or "undefined" previously retains its state (as
4541 required). Any memory that was "noaccess" before can only have
4542 been made that way by (1), and so it's OK to change it to
4543 "defined".
4545 See https://bugs.kde.org/show_bug.cgi?id=205541
4546 and https://bugs.kde.org/show_bug.cgi?id=210268
4548 static
4549 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4550 ULong di_handle )
4552 if (rr || ww || xx) {
4553 /* (2) mmap/mprotect other -> defined */
4554 MC_(make_mem_defined)(a, len);
4555 } else {
4556 /* (1) mmap/mprotect NONE -> noaccess */
4557 MC_(make_mem_noaccess)(a, len);
4561 static
4562 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4564 if (rr || ww || xx) {
4565 /* (4) mprotect other -> change any "noaccess" to "defined" */
4566 make_mem_defined_if_noaccess(a, len);
4567 } else {
4568 /* (3) mprotect NONE -> # no change */
4569 /* do nothing */
4574 static
4575 void mc_new_mem_startup( Addr a, SizeT len,
4576 Bool rr, Bool ww, Bool xx, ULong di_handle )
4578 // Because code is defined, initialised variables get put in the data
4579 // segment and are defined, and uninitialised variables get put in the
4580 // bss segment and are auto-zeroed (and so defined).
4582 // It's possible that there will be padding between global variables.
4583 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4584 // a program uses it, Memcheck will not complain. This is arguably a
4585 // false negative, but it's a grey area -- the behaviour is defined (the
4586 // padding is zeroed) but it's probably not what the user intended. And
4587 // we can't avoid it.
4589 // Note: we generally ignore RWX permissions, because we can't track them
4590 // without requiring more than one A bit which would slow things down a
4591 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4592 // So we mark any such pages as "unaddressable".
4593 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4594 a, (ULong)len, rr, ww, xx);
4595 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4598 static
4599 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4601 MC_(make_mem_defined)(a, len);
4605 /*------------------------------------------------------------*/
4606 /*--- Register event handlers ---*/
4607 /*------------------------------------------------------------*/
4609 /* Try and get a nonzero origin for the guest state section of thread
4610 tid characterised by (offset,size). Return 0 if nothing to show
4611 for it. */
4612 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4613 Int offset, SizeT size )
4615 Int sh2off;
4616 UInt area[3];
4617 UInt otag;
4618 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4619 if (sh2off == -1)
4620 return 0; /* This piece of guest state is not tracked */
4621 tl_assert(sh2off >= 0);
4622 tl_assert(0 == (sh2off % 4));
4623 area[0] = 0x31313131;
4624 area[2] = 0x27272727;
4625 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4626 tl_assert(area[0] == 0x31313131);
4627 tl_assert(area[2] == 0x27272727);
4628 otag = area[1];
4629 return otag;
4633 /* When some chunk of guest state is written, mark the corresponding
4634 shadow area as valid. This is used to initialise arbitrarily large
4635 chunks of guest state, hence the _SIZE value, which has to be as
4636 big as the biggest guest state.
4638 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4639 PtrdiffT offset, SizeT size)
4641 # define MAX_REG_WRITE_SIZE 2264
4642 UChar area[MAX_REG_WRITE_SIZE];
4643 tl_assert(size <= MAX_REG_WRITE_SIZE);
4644 VG_(memset)(area, V_BITS8_DEFINED, size);
4645 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4646 # undef MAX_REG_WRITE_SIZE
4649 static
4650 void mc_post_reg_write_clientcall ( ThreadId tid,
4651 PtrdiffT offset, SizeT size, Addr f)
4653 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4656 /* Look at the definedness of the guest's shadow state for
4657 [offset, offset+len). If any part of that is undefined, record
4658 a parameter error.
4660 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4661 PtrdiffT offset, SizeT size)
4663 Int i;
4664 Bool bad;
4665 UInt otag;
4667 UChar area[16];
4668 tl_assert(size <= 16);
4670 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4672 bad = False;
4673 for (i = 0; i < size; i++) {
4674 if (area[i] != V_BITS8_DEFINED) {
4675 bad = True;
4676 break;
4680 if (!bad)
4681 return;
4683 /* We've found some undefinedness. See if we can also find an
4684 origin for it. */
4685 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4686 MC_(record_regparam_error) ( tid, s, otag );
4690 /*------------------------------------------------------------*/
4691 /*--- Register-memory event handlers ---*/
4692 /*------------------------------------------------------------*/
4694 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4695 PtrdiffT guest_state_offset, SizeT size )
4697 SizeT i;
4698 UChar vbits8;
4699 Int offset;
4700 UInt d32;
4702 /* Slow loop. */
4703 for (i = 0; i < size; i++) {
4704 get_vbits8( a+i, &vbits8 );
4705 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4706 1, &vbits8 );
4709 if (MC_(clo_mc_level) != 3)
4710 return;
4712 /* Track origins. */
4713 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4714 if (offset == -1)
4715 return;
4717 switch (size) {
4718 case 1:
4719 d32 = MC_(helperc_b_load1)( a );
4720 break;
4721 case 2:
4722 d32 = MC_(helperc_b_load2)( a );
4723 break;
4724 case 4:
4725 d32 = MC_(helperc_b_load4)( a );
4726 break;
4727 case 8:
4728 d32 = MC_(helperc_b_load8)( a );
4729 break;
4730 case 16:
4731 d32 = MC_(helperc_b_load16)( a );
4732 break;
4733 case 32:
4734 d32 = MC_(helperc_b_load32)( a );
4735 break;
4736 default:
4737 tl_assert(0);
4740 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4743 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4744 PtrdiffT guest_state_offset, Addr a,
4745 SizeT size )
4747 SizeT i;
4748 UChar vbits8;
4749 Int offset;
4750 UInt d32;
4752 /* Slow loop. */
4753 for (i = 0; i < size; i++) {
4754 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4755 guest_state_offset+i, 1 );
4756 set_vbits8( a+i, vbits8 );
4759 if (MC_(clo_mc_level) != 3)
4760 return;
4762 /* Track origins. */
4763 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4764 if (offset == -1)
4765 return;
4767 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4768 switch (size) {
4769 case 1:
4770 MC_(helperc_b_store1)( a, d32 );
4771 break;
4772 case 2:
4773 MC_(helperc_b_store2)( a, d32 );
4774 break;
4775 case 4:
4776 MC_(helperc_b_store4)( a, d32 );
4777 break;
4778 case 8:
4779 MC_(helperc_b_store8)( a, d32 );
4780 break;
4781 case 16:
4782 MC_(helperc_b_store16)( a, d32 );
4783 break;
4784 case 32:
4785 MC_(helperc_b_store32)( a, d32 );
4786 break;
4787 default:
4788 tl_assert(0);
4793 /*------------------------------------------------------------*/
4794 /*--- Some static assertions ---*/
4795 /*------------------------------------------------------------*/
4797 /* The handwritten assembly helpers below have baked-in assumptions
4798 about various constant values. These assertions attempt to make
4799 that a bit safer by checking those values and flagging changes that
4800 would make the assembly invalid. Not perfect but it's better than
4801 nothing. */
4803 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4805 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4806 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4808 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4809 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4811 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4812 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4814 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4815 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4817 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4818 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4820 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4821 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4824 /*------------------------------------------------------------*/
4825 /*--- Functions called directly from generated code: ---*/
4826 /*--- Load/store handlers. ---*/
4827 /*------------------------------------------------------------*/
4829 /* Types: LOADV32, LOADV16, LOADV8 are:
4830 UWord fn ( Addr a )
4831 so they return 32-bits on 32-bit machines and 64-bits on
4832 64-bit machines. Addr has the same size as a host word.
4834 LOADV64 is always ULong fn ( Addr a )
4836 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4837 are a UWord, and for STOREV64 they are a ULong.
4840 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4841 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4842 primary map. This is all very tricky (and important!), so let's
4843 work through the maths by hand (below), *and* assert for these
4844 values at startup. */
4845 #define MASK(_szInBytes) \
4846 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4848 /* MASK only exists so as to define this macro. */
4849 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4850 ((_a) & MASK((_szInBits>>3)))
4852 /* On a 32-bit machine:
4854 N_PRIMARY_BITS == 16, so
4855 N_PRIMARY_MAP == 0x10000, so
4856 N_PRIMARY_MAP-1 == 0xFFFF, so
4857 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4859 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4860 = ~ ( 0xFFFF | 0xFFFF0000 )
4861 = ~ 0xFFFF'FFFF
4864 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4865 = ~ ( 0xFFFE | 0xFFFF0000 )
4866 = ~ 0xFFFF'FFFE
4869 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4870 = ~ ( 0xFFFC | 0xFFFF0000 )
4871 = ~ 0xFFFF'FFFC
4874 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4875 = ~ ( 0xFFF8 | 0xFFFF0000 )
4876 = ~ 0xFFFF'FFF8
4879 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4880 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4881 the 1-byte alignment case, it is always a zero value, since MASK(1)
4882 is zero. All as expected.
4884 On a 64-bit machine, it's more complex, since we're testing
4885 simultaneously for misalignment and for the address being at or
4886 above 64G:
4888 N_PRIMARY_BITS == 20, so
4889 N_PRIMARY_MAP == 0x100000, so
4890 N_PRIMARY_MAP-1 == 0xFFFFF, so
4891 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4893 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4894 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4895 = ~ 0xF'FFFF'FFFF
4896 = 0xFFFF'FFF0'0000'0000
4898 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4899 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4900 = ~ 0xF'FFFF'FFFE
4901 = 0xFFFF'FFF0'0000'0001
4903 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4904 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4905 = ~ 0xF'FFFF'FFFC
4906 = 0xFFFF'FFF0'0000'0003
4908 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4909 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4910 = ~ 0xF'FFFF'FFF8
4911 = 0xFFFF'FFF0'0000'0007
4914 /*------------------------------------------------------------*/
4915 /*--- LOADV256 and LOADV128 ---*/
4916 /*------------------------------------------------------------*/
4918 static INLINE
4919 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4920 Addr a, SizeT nBits, Bool isBigEndian )
4922 PROF_EVENT(MCPE_LOADV_128_OR_256);
4924 #ifndef PERF_FAST_LOADV
4925 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4926 return;
4927 #else
4929 UWord sm_off16, vabits16, j;
4930 UWord nBytes = nBits / 8;
4931 UWord nULongs = nBytes / 8;
4932 SecMap* sm;
4934 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4935 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4936 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4937 return;
4940 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4941 suitably aligned, is mapped, and addressible. */
4942 for (j = 0; j < nULongs; j++) {
4943 sm = get_secmap_for_reading_low(a + 8*j);
4944 sm_off16 = SM_OFF_16(a + 8*j);
4945 vabits16 = sm->vabits16[sm_off16];
4947 // Convert V bits from compact memory form to expanded
4948 // register form.
4949 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4950 res[j] = V_BITS64_DEFINED;
4951 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4952 res[j] = V_BITS64_UNDEFINED;
4953 } else {
4954 /* Slow case: some block of 8 bytes are not all-defined or
4955 all-undefined. */
4956 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4957 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4958 return;
4961 return;
4963 #endif
4966 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4968 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4970 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4972 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4975 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4977 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4979 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4981 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4984 /*------------------------------------------------------------*/
4985 /*--- LOADV64 ---*/
4986 /*------------------------------------------------------------*/
4988 static INLINE
4989 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4991 PROF_EVENT(MCPE_LOADV64);
4993 #ifndef PERF_FAST_LOADV
4994 return mc_LOADVn_slow( a, 64, isBigEndian );
4995 #else
4997 UWord sm_off16, vabits16;
4998 SecMap* sm;
5000 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
5001 PROF_EVENT(MCPE_LOADV64_SLOW1);
5002 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
5005 sm = get_secmap_for_reading_low(a);
5006 sm_off16 = SM_OFF_16(a);
5007 vabits16 = sm->vabits16[sm_off16];
5009 // Handle common case quickly: a is suitably aligned, is mapped, and
5010 // addressible.
5011 // Convert V bits from compact memory form to expanded register form.
5012 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
5013 return V_BITS64_DEFINED;
5014 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
5015 return V_BITS64_UNDEFINED;
5016 } else {
5017 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
5018 PROF_EVENT(MCPE_LOADV64_SLOW2);
5019 return mc_LOADVn_slow( a, 64, isBigEndian );
5022 #endif
5025 // Generic for all platforms
5026 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
5028 return mc_LOADV64(a, True);
5031 // Non-generic assembly for arm32-linux
5032 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5033 && defined(VGP_arm_linux)
5034 /* See mc_main_asm.c */
5036 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5037 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
5038 /* See mc_main_asm.c */
5040 #else
5041 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5042 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
5044 return mc_LOADV64(a, False);
5046 #endif
5048 /*------------------------------------------------------------*/
5049 /*--- STOREV64 ---*/
5050 /*------------------------------------------------------------*/
5052 static INLINE
5053 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
5055 PROF_EVENT(MCPE_STOREV64);
5057 #ifndef PERF_FAST_STOREV
5058 // XXX: this slow case seems to be marginally faster than the fast case!
5059 // Investigate further.
5060 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5061 #else
5063 UWord sm_off16, vabits16;
5064 SecMap* sm;
5066 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
5067 PROF_EVENT(MCPE_STOREV64_SLOW1);
5068 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5069 return;
5072 sm = get_secmap_for_reading_low(a);
5073 sm_off16 = SM_OFF_16(a);
5074 vabits16 = sm->vabits16[sm_off16];
5076 // To understand the below cleverness, see the extensive comments
5077 // in MC_(helperc_STOREV8).
5078 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
5079 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
5080 return;
5082 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
5083 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
5084 return;
5086 PROF_EVENT(MCPE_STOREV64_SLOW2);
5087 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5088 return;
5090 if (V_BITS64_UNDEFINED == vbits64) {
5091 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
5092 return;
5094 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
5095 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
5096 return;
5098 PROF_EVENT(MCPE_STOREV64_SLOW3);
5099 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5100 return;
5103 PROF_EVENT(MCPE_STOREV64_SLOW4);
5104 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5106 #endif
5109 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
5111 mc_STOREV64(a, vbits64, True);
5113 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
5115 mc_STOREV64(a, vbits64, False);
5118 /*------------------------------------------------------------*/
5119 /*--- LOADV32 ---*/
5120 /*------------------------------------------------------------*/
5122 static INLINE
5123 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
5125 PROF_EVENT(MCPE_LOADV32);
5127 #ifndef PERF_FAST_LOADV
5128 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5129 #else
5131 UWord sm_off, vabits8;
5132 SecMap* sm;
5134 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5135 PROF_EVENT(MCPE_LOADV32_SLOW1);
5136 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5139 sm = get_secmap_for_reading_low(a);
5140 sm_off = SM_OFF(a);
5141 vabits8 = sm->vabits8[sm_off];
5143 // Handle common case quickly: a is suitably aligned, is mapped, and the
5144 // entire word32 it lives in is addressible.
5145 // Convert V bits from compact memory form to expanded register form.
5146 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5147 // Almost certainly not necessary, but be paranoid.
5148 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5149 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
5150 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
5151 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
5152 } else {
5153 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5154 PROF_EVENT(MCPE_LOADV32_SLOW2);
5155 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5158 #endif
5161 // Generic for all platforms
5162 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
5164 return mc_LOADV32(a, True);
5167 // Non-generic assembly for arm32-linux
5168 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5169 && defined(VGP_arm_linux)
5170 /* See mc_main_asm.c */
5172 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5173 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5174 /* See mc_main_asm.c */
5176 #else
5177 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5178 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
5180 return mc_LOADV32(a, False);
5182 #endif
5184 /*------------------------------------------------------------*/
5185 /*--- STOREV32 ---*/
5186 /*------------------------------------------------------------*/
5188 static INLINE
5189 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
5191 PROF_EVENT(MCPE_STOREV32);
5193 #ifndef PERF_FAST_STOREV
5194 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5195 #else
5197 UWord sm_off, vabits8;
5198 SecMap* sm;
5200 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5201 PROF_EVENT(MCPE_STOREV32_SLOW1);
5202 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5203 return;
5206 sm = get_secmap_for_reading_low(a);
5207 sm_off = SM_OFF(a);
5208 vabits8 = sm->vabits8[sm_off];
5210 // To understand the below cleverness, see the extensive comments
5211 // in MC_(helperc_STOREV8).
5212 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
5213 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
5214 return;
5216 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
5217 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
5218 return;
5220 PROF_EVENT(MCPE_STOREV32_SLOW2);
5221 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5222 return;
5224 if (V_BITS32_UNDEFINED == vbits32) {
5225 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
5226 return;
5228 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
5229 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
5230 return;
5232 PROF_EVENT(MCPE_STOREV32_SLOW3);
5233 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5234 return;
5237 PROF_EVENT(MCPE_STOREV32_SLOW4);
5238 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5240 #endif
5243 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
5245 mc_STOREV32(a, vbits32, True);
5247 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
5249 mc_STOREV32(a, vbits32, False);
5252 /*------------------------------------------------------------*/
5253 /*--- LOADV16 ---*/
5254 /*------------------------------------------------------------*/
5256 static INLINE
5257 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
5259 PROF_EVENT(MCPE_LOADV16);
5261 #ifndef PERF_FAST_LOADV
5262 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5263 #else
5265 UWord sm_off, vabits8;
5266 SecMap* sm;
5268 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5269 PROF_EVENT(MCPE_LOADV16_SLOW1);
5270 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5273 sm = get_secmap_for_reading_low(a);
5274 sm_off = SM_OFF(a);
5275 vabits8 = sm->vabits8[sm_off];
5276 // Handle common case quickly: a is suitably aligned, is mapped, and is
5277 // addressible.
5278 // Convert V bits from compact memory form to expanded register form
5279 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
5280 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
5281 else {
5282 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5283 // the two sub-bytes.
5284 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
5285 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
5286 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
5287 else {
5288 /* Slow case: the two bytes are not all-defined or all-undefined. */
5289 PROF_EVENT(MCPE_LOADV16_SLOW2);
5290 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5294 #endif
5297 // Generic for all platforms
5298 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
5300 return mc_LOADV16(a, True);
5303 // Non-generic assembly for arm32-linux
5304 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5305 && defined(VGP_arm_linux)
5306 __asm__( /* Derived from NCode template */
5307 ".text \n"
5308 ".align 2 \n"
5309 ".global vgMemCheck_helperc_LOADV16le \n"
5310 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5311 "vgMemCheck_helperc_LOADV16le: \n" //
5312 " tst r0, #1 \n" //
5313 " bne .LLV16LEc12 \n" // if misaligned
5314 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5315 " movw r3, #:lower16:primary_map \n" //
5316 " uxth r1, r0 \n" // r1 = sec-map-offB
5317 " movt r3, #:upper16:primary_map \n" //
5318 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5319 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5320 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5321 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5322 ".LLV16LEh9: \n" //
5323 " mov r0, #0xFFFFFFFF \n" //
5324 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5325 " bx lr \n" //
5326 ".LLV16LEc0: \n" //
5327 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5328 " bne .LLV16LEc4 \n" //
5329 ".LLV16LEc2: \n" //
5330 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5331 " bx lr \n" //
5332 ".LLV16LEc4: \n" //
5333 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5334 // Extract the relevant 4 bits and inspect.
5335 " and r2, r0, #2 \n" // addr & 2
5336 " add r2, r2, r2 \n" // 2 * (addr & 2)
5337 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5338 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5340 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5341 " beq .LLV16LEh9 \n" //
5343 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5344 " beq .LLV16LEc2 \n" //
5346 ".LLV16LEc12: \n" //
5347 " push {r4, lr} \n" //
5348 " mov r2, #0 \n" //
5349 " mov r1, #16 \n" //
5350 " bl mc_LOADVn_slow \n" //
5351 " pop {r4, pc} \n" //
5352 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5353 ".previous\n"
5356 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5357 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5358 __asm__(
5359 ".text\n"
5360 ".align 16\n"
5361 ".global vgMemCheck_helperc_LOADV16le\n"
5362 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5363 "vgMemCheck_helperc_LOADV16le:\n"
5364 " test $0x1, %eax\n"
5365 " jne .LLV16LE5\n" /* jump if not aligned */
5366 " mov %eax, %edx\n"
5367 " shr $0x10, %edx\n"
5368 " mov primary_map(,%edx,4), %ecx\n"
5369 " movzwl %ax, %edx\n"
5370 " shr $0x2, %edx\n"
5371 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5372 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5373 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5374 ".LLV16LE1:\n"
5375 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5376 " ret\n"
5377 ".LLV16LE2:\n"
5378 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5379 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5380 ".LLV16LE3:\n"
5381 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5382 " ret\n"
5383 ".LLV16LE4:\n"
5384 " mov %eax, %ecx\n"
5385 " and $0x2, %ecx\n"
5386 " add %ecx, %ecx\n"
5387 " sar %cl, %edx\n"
5388 " and $0xf, %edx\n"
5389 " cmp $0xa, %edx\n"
5390 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5391 " cmp $0x5, %edx\n"
5392 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5393 ".LLV16LE5:\n"
5394 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5395 " mov $16, %edx\n"
5396 " jmp mc_LOADVn_slow\n"
5397 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5398 ".previous\n"
5401 #else
5402 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5403 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5405 return mc_LOADV16(a, False);
5407 #endif
5409 /*------------------------------------------------------------*/
5410 /*--- STOREV16 ---*/
5411 /*------------------------------------------------------------*/
5413 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5414 static INLINE
5415 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5417 UInt shift;
5418 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5419 shift = (a & 2) << 1; // shift by 0 or 4
5420 vabits8 >>= shift; // shift the four bits to the bottom
5421 // check 2 x vabits2 != VA_BITS2_NOACCESS
5422 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5423 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5426 static INLINE
5427 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5429 PROF_EVENT(MCPE_STOREV16);
5431 #ifndef PERF_FAST_STOREV
5432 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5433 #else
5435 UWord sm_off, vabits8;
5436 SecMap* sm;
5438 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5439 PROF_EVENT(MCPE_STOREV16_SLOW1);
5440 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5441 return;
5444 sm = get_secmap_for_reading_low(a);
5445 sm_off = SM_OFF(a);
5446 vabits8 = sm->vabits8[sm_off];
5448 // To understand the below cleverness, see the extensive comments
5449 // in MC_(helperc_STOREV8).
5450 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5451 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5452 return;
5454 if (!is_distinguished_sm(sm)
5455 && accessible_vabits4_in_vabits8(a, vabits8)) {
5456 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5457 &(sm->vabits8[sm_off]) );
5458 return;
5460 PROF_EVENT(MCPE_STOREV16_SLOW2);
5461 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5463 if (V_BITS16_UNDEFINED == vbits16) {
5464 if (vabits8 == VA_BITS8_UNDEFINED) {
5465 return;
5467 if (!is_distinguished_sm(sm)
5468 && accessible_vabits4_in_vabits8(a, vabits8)) {
5469 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5470 &(sm->vabits8[sm_off]) );
5471 return;
5473 PROF_EVENT(MCPE_STOREV16_SLOW3);
5474 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5475 return;
5478 PROF_EVENT(MCPE_STOREV16_SLOW4);
5479 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5481 #endif
5485 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5487 mc_STOREV16(a, vbits16, True);
5489 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5491 mc_STOREV16(a, vbits16, False);
5494 /*------------------------------------------------------------*/
5495 /*--- LOADV8 ---*/
5496 /*------------------------------------------------------------*/
5498 /* Note: endianness is irrelevant for size == 1 */
5500 // Non-generic assembly for arm32-linux
5501 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5502 && defined(VGP_arm_linux)
5503 __asm__( /* Derived from NCode template */
5504 ".text \n"
5505 ".align 2 \n"
5506 ".global vgMemCheck_helperc_LOADV8 \n"
5507 ".type vgMemCheck_helperc_LOADV8, %function \n"
5508 "vgMemCheck_helperc_LOADV8: \n" //
5509 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5510 " movw r3, #:lower16:primary_map \n" //
5511 " uxth r1, r0 \n" // r1 = sec-map-offB
5512 " movt r3, #:upper16:primary_map \n" //
5513 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5514 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5515 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5516 " bne .LLV8c0 \n" // no, goto .LLV8c0
5517 ".LLV8h9: \n" //
5518 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5519 " bx lr \n" //
5520 ".LLV8c0: \n" //
5521 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5522 " bne .LLV8c4 \n" //
5523 ".LLV8c2: \n" //
5524 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5525 " bx lr \n" //
5526 ".LLV8c4: \n" //
5527 // r1 holds sec-map-VABITS8
5528 // r0 holds the address. Extract the relevant 2 bits and inspect.
5529 " and r2, r0, #3 \n" // addr & 3
5530 " add r2, r2, r2 \n" // 2 * (addr & 3)
5531 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5532 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5534 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5535 " beq .LLV8h9 \n" //
5537 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5538 " beq .LLV8c2 \n" //
5540 " push {r4, lr} \n" //
5541 " mov r2, #0 \n" //
5542 " mov r1, #8 \n" //
5543 " bl mc_LOADVn_slow \n" //
5544 " pop {r4, pc} \n" //
5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5546 ".previous\n"
5549 /* Non-generic assembly for x86-linux */
5550 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5551 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5552 __asm__(
5553 ".text\n"
5554 ".align 16\n"
5555 ".global vgMemCheck_helperc_LOADV8\n"
5556 ".type vgMemCheck_helperc_LOADV8, @function\n"
5557 "vgMemCheck_helperc_LOADV8:\n"
5558 " mov %eax, %edx\n"
5559 " shr $0x10, %edx\n"
5560 " mov primary_map(,%edx,4), %ecx\n"
5561 " movzwl %ax, %edx\n"
5562 " shr $0x2, %edx\n"
5563 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5564 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5565 " jne .LLV8LE2\n" /* jump if not defined */
5566 ".LLV8LE1:\n"
5567 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5568 " ret\n"
5569 ".LLV8LE2:\n"
5570 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5571 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5572 ".LLV8LE3:\n"
5573 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5574 " ret\n"
5575 ".LLV8LE4:\n"
5576 " mov %eax, %ecx\n"
5577 " and $0x3, %ecx\n"
5578 " add %ecx, %ecx\n"
5579 " sar %cl, %edx\n"
5580 " and $0x3, %edx\n"
5581 " cmp $0x2, %edx\n"
5582 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5583 " cmp $0x1, %edx\n"
5584 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5585 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5586 " mov $0x8, %edx\n"
5587 " jmp mc_LOADVn_slow\n"
5588 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5589 ".previous\n"
5592 #else
5593 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5594 VG_REGPARM(1)
5595 UWord MC_(helperc_LOADV8) ( Addr a )
5597 PROF_EVENT(MCPE_LOADV8);
5599 #ifndef PERF_FAST_LOADV
5600 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5601 #else
5603 UWord sm_off, vabits8;
5604 SecMap* sm;
5606 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5607 PROF_EVENT(MCPE_LOADV8_SLOW1);
5608 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5611 sm = get_secmap_for_reading_low(a);
5612 sm_off = SM_OFF(a);
5613 vabits8 = sm->vabits8[sm_off];
5614 // Convert V bits from compact memory form to expanded register form
5615 // Handle common case quickly: a is mapped, and the entire
5616 // word32 it lives in is addressible.
5617 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5618 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5619 else {
5620 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5621 // the single byte.
5622 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5623 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5624 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5625 else {
5626 /* Slow case: the byte is not all-defined or all-undefined. */
5627 PROF_EVENT(MCPE_LOADV8_SLOW2);
5628 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5632 #endif
5634 #endif
5636 /*------------------------------------------------------------*/
5637 /*--- STOREV8 ---*/
5638 /*------------------------------------------------------------*/
5640 VG_REGPARM(2)
5641 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5643 PROF_EVENT(MCPE_STOREV8);
5645 #ifndef PERF_FAST_STOREV
5646 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5647 #else
5649 UWord sm_off, vabits8;
5650 SecMap* sm;
5652 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5653 PROF_EVENT(MCPE_STOREV8_SLOW1);
5654 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5655 return;
5658 sm = get_secmap_for_reading_low(a);
5659 sm_off = SM_OFF(a);
5660 vabits8 = sm->vabits8[sm_off];
5662 // Clevernesses to speed up storing V bits.
5663 // The 64/32/16 bit cases also have similar clevernesses, but it
5664 // works a little differently to the code below.
5666 // Cleverness 1: sometimes we don't have to write the shadow memory at
5667 // all, if we can tell that what we want to write is the same as what is
5668 // already there. These cases are marked below as "defined on defined" and
5669 // "undefined on undefined".
5671 // Cleverness 2:
5672 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5673 // be written in the secondary map. V bits can be directly written
5674 // if 4 conditions are respected:
5675 // * The address for which V bits are written is naturally aligned
5676 // on 1 byte for STOREV8 (this is always true)
5677 // on 2 bytes for STOREV16
5678 // on 4 bytes for STOREV32
5679 // on 8 bytes for STOREV64.
5680 // * V bits being written are either fully defined or fully undefined.
5681 // (for partially defined V bits, V bits cannot be directly written,
5682 // as the secondary vbits table must be maintained).
5683 // * the secmap is not distinguished (distinguished maps cannot be
5684 // modified).
5685 // * the memory corresponding to the V bits being written is
5686 // accessible (if one or more bytes are not accessible,
5687 // we must call mc_STOREVn_slow in order to report accessibility
5688 // errors).
5689 // Note that for STOREV32 and STOREV64, it is too expensive
5690 // to verify the accessibility of each byte for the benefit it
5691 // brings. Instead, a quicker check is done by comparing to
5692 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5693 // but misses some opportunity of direct modifications.
5694 // Checking each byte accessibility was measured for
5695 // STOREV32+perf tests and was slowing down all perf tests.
5696 // The cases corresponding to cleverness 2 are marked below as
5697 // "direct mod".
5698 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5699 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5700 return; // defined on defined
5702 if (!is_distinguished_sm(sm)
5703 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5704 // direct mod
5705 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5706 &(sm->vabits8[sm_off]) );
5707 return;
5709 PROF_EVENT(MCPE_STOREV8_SLOW2);
5710 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5711 return;
5713 if (V_BITS8_UNDEFINED == vbits8) {
5714 if (vabits8 == VA_BITS8_UNDEFINED) {
5715 return; // undefined on undefined
5717 if (!is_distinguished_sm(sm)
5718 && (VA_BITS2_NOACCESS
5719 != extract_vabits2_from_vabits8(a, vabits8))) {
5720 // direct mod
5721 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5722 &(sm->vabits8[sm_off]) );
5723 return;
5725 PROF_EVENT(MCPE_STOREV8_SLOW3);
5726 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5727 return;
5730 // Partially defined word
5731 PROF_EVENT(MCPE_STOREV8_SLOW4);
5732 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5734 #endif
5738 /*------------------------------------------------------------*/
5739 /*--- Functions called directly from generated code: ---*/
5740 /*--- Value-check failure handlers. ---*/
5741 /*------------------------------------------------------------*/
5743 /* Call these ones when an origin is available ... */
5744 VG_REGPARM(1)
5745 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5746 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5749 VG_REGPARM(1)
5750 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5751 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5754 VG_REGPARM(1)
5755 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5756 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5759 VG_REGPARM(1)
5760 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5761 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5764 VG_REGPARM(2)
5765 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5766 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5769 /* ... and these when an origin isn't available. */
5771 VG_REGPARM(0)
5772 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5773 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5776 VG_REGPARM(0)
5777 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5778 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5781 VG_REGPARM(0)
5782 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5783 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5786 VG_REGPARM(0)
5787 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5788 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5791 VG_REGPARM(1)
5792 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5793 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5797 /*------------------------------------------------------------*/
5798 /*--- Metadata get/set functions, for client requests. ---*/
5799 /*------------------------------------------------------------*/
5801 // Nb: this expands the V+A bits out into register-form V bits, even though
5802 // they're in memory. This is for backward compatibility, and because it's
5803 // probably what the user wants.
5805 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5806 error [no longer used], 3 == addressing error. */
5807 /* Nb: We used to issue various definedness/addressability errors from here,
5808 but we took them out because they ranged from not-very-helpful to
5809 downright annoying, and they complicated the error data structures. */
5810 static Int mc_get_or_set_vbits_for_client (
5811 Addr a,
5812 Addr vbits,
5813 SizeT szB,
5814 Bool setting, /* True <=> set vbits, False <=> get vbits */
5815 Bool is_client_request /* True <=> real user request
5816 False <=> internal call from gdbserver */
5819 SizeT i;
5820 Bool ok;
5821 UChar vbits8;
5823 /* Check that arrays are addressible before doing any getting/setting.
5824 vbits to be checked only for real user request. */
5825 for (i = 0; i < szB; i++) {
5826 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5827 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5828 return 3;
5832 /* Do the copy */
5833 if (setting) {
5834 /* setting */
5835 for (i = 0; i < szB; i++) {
5836 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5837 tl_assert(ok);
5839 } else {
5840 /* getting */
5841 for (i = 0; i < szB; i++) {
5842 ok = get_vbits8(a + i, &vbits8);
5843 tl_assert(ok);
5844 ((UChar*)vbits)[i] = vbits8;
5846 if (is_client_request)
5847 // The bytes in vbits[] have now been set, so mark them as such.
5848 MC_(make_mem_defined)(vbits, szB);
5851 return 1;
5855 /*------------------------------------------------------------*/
5856 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5857 /*------------------------------------------------------------*/
5859 /* For the memory leak detector, say whether an entire 64k chunk of
5860 address space is possibly in use, or not. If in doubt return
5861 True.
5863 Bool MC_(is_within_valid_secondary) ( Addr a )
5865 SecMap* sm = maybe_get_secmap_for ( a );
5866 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5867 /* Definitely not in use. */
5868 return False;
5869 } else {
5870 return True;
5875 /* For the memory leak detector, say whether or not a given word
5876 address is to be regarded as valid. */
5877 Bool MC_(is_valid_aligned_word) ( Addr a )
5879 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5880 tl_assert(VG_IS_WORD_ALIGNED(a));
5881 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5882 return False;
5883 if (sizeof(UWord) == 8) {
5884 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5885 return False;
5887 if (UNLIKELY(MC_(in_ignored_range)(a)))
5888 return False;
5889 else
5890 return True;
5894 /*------------------------------------------------------------*/
5895 /*--- Initialisation ---*/
5896 /*------------------------------------------------------------*/
5898 static void init_shadow_memory ( void )
5900 Int i;
5901 SecMap* sm;
5903 tl_assert(V_BIT_UNDEFINED == 1);
5904 tl_assert(V_BIT_DEFINED == 0);
5905 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5906 tl_assert(V_BITS8_DEFINED == 0);
5908 /* Build the 3 distinguished secondaries */
5909 sm = &sm_distinguished[SM_DIST_NOACCESS];
5910 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5912 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5913 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5915 sm = &sm_distinguished[SM_DIST_DEFINED];
5916 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5918 /* Set up the primary map. */
5919 /* These entries gradually get overwritten as the used address
5920 space expands. */
5921 for (i = 0; i < N_PRIMARY_MAP; i++)
5922 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5924 /* Auxiliary primary maps */
5925 init_auxmap_L1_L2();
5927 /* auxmap_size = auxmap_used = 0;
5928 no ... these are statically initialised */
5930 /* Secondary V bit table */
5931 secVBitTable = createSecVBitTable();
5935 /*------------------------------------------------------------*/
5936 /*--- Sanity check machinery (permanently engaged) ---*/
5937 /*------------------------------------------------------------*/
5939 static Bool mc_cheap_sanity_check ( void )
5941 n_sanity_cheap++;
5942 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5943 /* Check for sane operating level */
5944 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5945 return False;
5946 /* nothing else useful we can rapidly check */
5947 return True;
5950 static Bool mc_expensive_sanity_check ( void )
5952 Int i;
5953 Word n_secmaps_found;
5954 SecMap* sm;
5955 const HChar* errmsg;
5956 Bool bad = False;
5958 if (0) VG_(printf)("expensive sanity check\n");
5959 if (0) return True;
5961 n_sanity_expensive++;
5962 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5964 /* Check for sane operating level */
5965 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5966 return False;
5968 /* Check that the 3 distinguished SMs are still as they should be. */
5970 /* Check noaccess DSM. */
5971 sm = &sm_distinguished[SM_DIST_NOACCESS];
5972 for (i = 0; i < SM_CHUNKS; i++)
5973 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5974 bad = True;
5976 /* Check undefined DSM. */
5977 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5978 for (i = 0; i < SM_CHUNKS; i++)
5979 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5980 bad = True;
5982 /* Check defined DSM. */
5983 sm = &sm_distinguished[SM_DIST_DEFINED];
5984 for (i = 0; i < SM_CHUNKS; i++)
5985 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5986 bad = True;
5988 if (bad) {
5989 VG_(printf)("memcheck expensive sanity: "
5990 "distinguished_secondaries have changed\n");
5991 return False;
5994 /* If we're not checking for undefined value errors, the secondary V bit
5995 * table should be empty. */
5996 if (MC_(clo_mc_level) == 1) {
5997 if (0 != VG_(OSetGen_Size)(secVBitTable))
5998 return False;
6001 /* check the auxiliary maps, very thoroughly */
6002 n_secmaps_found = 0;
6003 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
6004 if (errmsg) {
6005 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
6006 return False;
6009 /* n_secmaps_found is now the number referred to by the auxiliary
6010 primary map. Now add on the ones referred to by the main
6011 primary map. */
6012 for (i = 0; i < N_PRIMARY_MAP; i++) {
6013 if (primary_map[i] == NULL) {
6014 bad = True;
6015 } else {
6016 if (!is_distinguished_sm(primary_map[i]))
6017 n_secmaps_found++;
6021 /* check that the number of secmaps issued matches the number that
6022 are reachable (iow, no secmap leaks) */
6023 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
6024 bad = True;
6026 if (bad) {
6027 VG_(printf)("memcheck expensive sanity: "
6028 "apparent secmap leakage\n");
6029 return False;
6032 if (bad) {
6033 VG_(printf)("memcheck expensive sanity: "
6034 "auxmap covers wrong address space\n");
6035 return False;
6038 /* there is only one pointer to each secmap (expensive) */
6040 return True;
6043 /*------------------------------------------------------------*/
6044 /*--- Command line args ---*/
6045 /*------------------------------------------------------------*/
6047 /* 31 Aug 2015: Vectorised code is now so widespread that
6048 --partial-loads-ok needs to be enabled by default on all platforms.
6049 Not doing so causes lots of false errors. */
6050 Bool MC_(clo_partial_loads_ok) = True;
6051 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
6052 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
6053 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
6054 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
6055 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
6056 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
6057 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString)
6058 | H2S( LchLength64)
6059 | H2S( LchNewArray)
6060 | H2S( LchMultipleInheritance);
6061 Bool MC_(clo_xtree_leak) = False;
6062 const HChar* MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
6063 Bool MC_(clo_workaround_gcc296_bugs) = False;
6064 Int MC_(clo_malloc_fill) = -1;
6065 Int MC_(clo_free_fill) = -1;
6066 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free;
6067 Int MC_(clo_mc_level) = 2;
6068 Bool MC_(clo_show_mismatched_frees) = True;
6069 Bool MC_(clo_show_realloc_size_zero) = True;
6071 ExpensiveDefinednessChecks
6072 MC_(clo_expensive_definedness_checks) = EdcAUTO;
6074 Bool MC_(clo_ignore_range_below_sp) = False;
6075 UInt MC_(clo_ignore_range_below_sp__first_offset) = 0;
6076 UInt MC_(clo_ignore_range_below_sp__last_offset) = 0;
6078 static const HChar * MC_(parse_leak_heuristics_tokens) =
6079 "-,stdstring,length64,newarray,multipleinheritance";
6080 /* The first heuristic value (LchNone) has no keyword, as this is
6081 a fake heuristic used to collect the blocks found without any
6082 heuristic. */
6084 static Bool mc_process_cmd_line_options(const HChar* arg)
6086 const HChar* tmp_str;
6087 Bool tmp_show;
6089 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6091 /* Set MC_(clo_mc_level):
6092 1 = A bit tracking only
6093 2 = A and V bit tracking, but no V bit origins
6094 3 = A and V bit tracking, and V bit origins
6096 Do this by inspecting --undef-value-errors= and
6097 --track-origins=. Reject the case --undef-value-errors=no
6098 --track-origins=yes as meaningless.
6100 if VG_BOOL_CLO(arg, "--undef-value-errors", tmp_show) {
6101 if (tmp_show) {
6102 if (MC_(clo_mc_level) == 1)
6103 MC_(clo_mc_level) = 2;
6104 } else {
6105 if (MC_(clo_mc_level) == 3) {
6106 goto bad_level;
6107 } else {
6108 MC_(clo_mc_level) = 1;
6112 else if VG_BOOL_CLO(arg, "--track-origins", tmp_show) {
6113 if (tmp_show) {
6114 if (MC_(clo_mc_level) == 1) {
6115 goto bad_level;
6116 } else {
6117 MC_(clo_mc_level) = 3;
6119 } else {
6120 if (MC_(clo_mc_level) == 3)
6121 MC_(clo_mc_level) = 2;
6124 else if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
6125 else if VG_USET_CLOM(cloPD, arg, "--errors-for-leak-kinds",
6126 MC_(parse_leak_kinds_tokens),
6127 MC_(clo_error_for_leak_kinds)) {}
6128 else if VG_USET_CLOM(cloPD, arg, "--show-leak-kinds",
6129 MC_(parse_leak_kinds_tokens),
6130 MC_(clo_show_leak_kinds)) {}
6131 else if VG_USET_CLOM(cloPD, arg, "--leak-check-heuristics",
6132 MC_(parse_leak_heuristics_tokens),
6133 MC_(clo_leak_check_heuristics)) {}
6134 else if (VG_BOOL_CLOM(cloPD, arg, "--show-reachable", tmp_show)) {
6135 if (tmp_show) {
6136 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
6137 } else {
6138 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
6141 else if VG_BOOL_CLOM(cloPD, arg, "--show-possibly-lost", tmp_show) {
6142 if (tmp_show) {
6143 MC_(clo_show_leak_kinds) |= R2S(Possible);
6144 } else {
6145 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
6148 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
6149 MC_(clo_workaround_gcc296_bugs)) {}
6151 else if VG_BINT_CLOM(cloPD, arg, "--freelist-vol", MC_(clo_freelist_vol),
6152 0, 10*1000*1000*1000LL) {}
6154 else if VG_BINT_CLOM(cloPD, arg, "--freelist-big-blocks",
6155 MC_(clo_freelist_big_blocks),
6156 0, 10*1000*1000*1000LL) {}
6158 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=no",
6159 MC_(clo_leak_check), LC_Off) {}
6160 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=summary",
6161 MC_(clo_leak_check), LC_Summary) {}
6162 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=yes",
6163 MC_(clo_leak_check), LC_Full) {}
6164 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=full",
6165 MC_(clo_leak_check), LC_Full) {}
6167 else if VG_XACT_CLO(arg, "--leak-resolution=low",
6168 MC_(clo_leak_resolution), Vg_LowRes) {}
6169 else if VG_XACT_CLO(arg, "--leak-resolution=med",
6170 MC_(clo_leak_resolution), Vg_MedRes) {}
6171 else if VG_XACT_CLO(arg, "--leak-resolution=high",
6172 MC_(clo_leak_resolution), Vg_HighRes) {}
6174 else if VG_STR_CLOM(cloPD, arg, "--ignore-ranges", tmp_str) {
6175 Bool ok = parse_ignore_ranges(tmp_str);
6176 if (!ok) {
6177 VG_(message)(Vg_DebugMsg,
6178 "ERROR: --ignore-ranges: "
6179 "invalid syntax, or end <= start in range\n");
6180 return False;
6182 if (gIgnoredAddressRanges) {
6183 UInt i;
6184 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6185 UWord val = IAR_INVALID;
6186 UWord key_min = ~(UWord)0;
6187 UWord key_max = (UWord)0;
6188 VG_(indexRangeMap)( &key_min, &key_max, &val,
6189 gIgnoredAddressRanges, i );
6190 tl_assert(key_min <= key_max);
6191 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
6192 if (key_max - key_min > limit && val == IAR_CommandLine) {
6193 VG_(message)(Vg_DebugMsg,
6194 "ERROR: --ignore-ranges: suspiciously large range:\n");
6195 VG_(message)(Vg_DebugMsg,
6196 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
6197 key_max - key_min + 1);
6198 return False;
6204 else if VG_STR_CLOM(cloPD, arg, "--ignore-range-below-sp", tmp_str) {
6205 /* This seems at first a bit weird, but: in order to imply
6206 a non-wrapped-around address range, the first offset needs to be
6207 larger than the second one. For example
6208 --ignore-range-below-sp=8192,8189
6209 would cause accesses to in the range [SP-8192, SP-8189] to be
6210 ignored. */
6211 UInt offs1 = 0, offs2 = 0;
6212 Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
6213 // Ensure we used all the text after the '=' sign.
6214 if (ok && *tmp_str != 0) ok = False;
6215 if (!ok) {
6216 VG_(message)(Vg_DebugMsg,
6217 "ERROR: --ignore-range-below-sp: invalid syntax. "
6218 " Expected \"...=decimalnumber-decimalnumber\".\n");
6219 return False;
6221 if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
6222 VG_(message)(Vg_DebugMsg,
6223 "ERROR: --ignore-range-below-sp: suspiciously large "
6224 "offset(s): %u and %u\n", offs1, offs2);
6225 return False;
6227 if (offs1 <= offs2) {
6228 VG_(message)(Vg_DebugMsg,
6229 "ERROR: --ignore-range-below-sp: invalid offsets "
6230 "(the first must be larger): %u and %u\n", offs1, offs2);
6231 return False;
6233 tl_assert(offs1 > offs2);
6234 if (offs1 - offs2 > 4096 /*arbitrary*/) {
6235 VG_(message)(Vg_DebugMsg,
6236 "ERROR: --ignore-range-below-sp: suspiciously large "
6237 "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
6238 return False;
6240 MC_(clo_ignore_range_below_sp) = True;
6241 MC_(clo_ignore_range_below_sp__first_offset) = offs1;
6242 MC_(clo_ignore_range_below_sp__last_offset) = offs2;
6243 return True;
6246 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
6247 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
6249 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
6250 MC_(clo_keep_stacktraces), KS_alloc) {}
6251 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
6252 MC_(clo_keep_stacktraces), KS_free) {}
6253 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
6254 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
6255 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
6256 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
6257 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
6258 MC_(clo_keep_stacktraces), KS_none) {}
6260 else if VG_BOOL_CLOM(cloPD, arg, "--show-mismatched-frees",
6261 MC_(clo_show_mismatched_frees)) {}
6262 else if VG_BOOL_CLOM(cloPD, arg, "--show-realloc-size-zero",
6263 MC_(clo_show_realloc_size_zero)) {}
6265 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=no",
6266 MC_(clo_expensive_definedness_checks), EdcNO) {}
6267 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=auto",
6268 MC_(clo_expensive_definedness_checks), EdcAUTO) {}
6269 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=yes",
6270 MC_(clo_expensive_definedness_checks), EdcYES) {}
6272 else if VG_BOOL_CLO(arg, "--xtree-leak",
6273 MC_(clo_xtree_leak)) {}
6274 else if VG_STR_CLO (arg, "--xtree-leak-file",
6275 MC_(clo_xtree_leak_file)) {}
6277 else
6278 return VG_(replacement_malloc_process_cmd_line_option)(arg);
6280 return True;
6283 bad_level:
6284 VG_(fmsg_bad_option)(arg,
6285 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6286 return False;
6289 static void mc_print_usage(void)
6291 VG_(printf)(
6292 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6293 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6294 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6295 " [definite,possible]\n"
6296 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6297 " [definite,possible]\n"
6298 " where kind is one of:\n"
6299 " definite indirect possible reachable all none\n"
6300 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6301 " improving leak search false positive [all]\n"
6302 " where heur is one of:\n"
6303 " stdstring length64 newarray multipleinheritance all none\n"
6304 " --show-reachable=yes same as --show-leak-kinds=all\n"
6305 " --show-reachable=no --show-possibly-lost=yes\n"
6306 " same as --show-leak-kinds=definite,possible\n"
6307 " --show-reachable=no --show-possibly-lost=no\n"
6308 " same as --show-leak-kinds=definite\n"
6309 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6310 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6311 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6312 " --track-origins=no|yes show origins of undefined values? [no]\n"
6313 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6314 " --expensive-definedness-checks=no|auto|yes\n"
6315 " Use extra-precise definedness tracking [auto]\n"
6316 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6317 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6318 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6319 " Use --ignore-range-below-sp instead.\n"
6320 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6321 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6322 " accesses at the given offsets below SP\n"
6323 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6324 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6325 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6326 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6327 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6328 " --show-realloc-size-zero=no|yes show realocs with a size of zero? [yes]\n"
6332 static void mc_print_debug_usage(void)
6334 VG_(printf)(
6335 " (none)\n"
6340 /*------------------------------------------------------------*/
6341 /*--- Client blocks ---*/
6342 /*------------------------------------------------------------*/
6344 /* Client block management:
6346 This is managed as an expanding array of client block descriptors.
6347 Indices of live descriptors are issued to the client, so it can ask
6348 to free them later. Therefore we cannot slide live entries down
6349 over dead ones. Instead we must use free/inuse flags and scan for
6350 an empty slot at allocation time. This in turn means allocation is
6351 relatively expensive, so we hope this does not happen too often.
6353 An unused block has start == size == 0
6356 /* type CGenBlock is defined in mc_include.h */
6358 /* This subsystem is self-initialising. */
6359 static UWord cgb_size = 0;
6360 static UWord cgb_used = 0;
6361 static CGenBlock* cgbs = NULL;
6363 /* Stats for this subsystem. */
6364 static ULong cgb_used_MAX = 0; /* Max in use. */
6365 static ULong cgb_allocs = 0; /* Number of allocs. */
6366 static ULong cgb_discards = 0; /* Number of discards. */
6367 static ULong cgb_search = 0; /* Number of searches. */
6370 /* Get access to the client block array. */
6371 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
6372 /*OUT*/UWord* nBlocks )
6374 *blocks = cgbs;
6375 *nBlocks = cgb_used;
6379 static
6380 Int alloc_client_block ( void )
6382 UWord i, sz_new;
6383 CGenBlock* cgbs_new;
6385 cgb_allocs++;
6387 for (i = 0; i < cgb_used; i++) {
6388 cgb_search++;
6389 if (cgbs[i].start == 0 && cgbs[i].size == 0)
6390 return i;
6393 /* Not found. Try to allocate one at the end. */
6394 if (cgb_used < cgb_size) {
6395 cgb_used++;
6396 return cgb_used-1;
6399 /* Ok, we have to allocate a new one. */
6400 tl_assert(cgb_used == cgb_size);
6401 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
6403 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
6404 for (i = 0; i < cgb_used; i++)
6405 cgbs_new[i] = cgbs[i];
6407 if (cgbs != NULL)
6408 VG_(free)( cgbs );
6409 cgbs = cgbs_new;
6411 cgb_size = sz_new;
6412 cgb_used++;
6413 if (cgb_used > cgb_used_MAX)
6414 cgb_used_MAX = cgb_used;
6415 return cgb_used-1;
6419 static void show_client_block_stats ( void )
6421 VG_(message)(Vg_DebugMsg,
6422 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6423 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6426 static void print_monitor_help ( void )
6428 VG_(gdb_printf)
6430 "\n"
6431 "memcheck monitor commands:\n"
6432 " xb <addr> [<len>]\n"
6433 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6434 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6435 " Then prints the bytes values below the corresponding validity bits\n"
6436 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6437 " Example: xb 0x8049c78 10\n"
6438 " get_vbits <addr> [<len>]\n"
6439 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6440 " make_memory [noaccess|undefined\n"
6441 " |defined|Definedifaddressable] <addr> [<len>]\n"
6442 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6443 " check_memory [addressable|defined] <addr> [<len>]\n"
6444 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6445 " and outputs a description of <addr>\n"
6446 " leak_check [full*|summary|xtleak]\n"
6447 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6448 " [heuristics heur1,heur2,...]\n"
6449 " [new|increased*|changed|any]\n"
6450 " [unlimited*|limited <max_loss_records_output>]\n"
6451 " * = defaults\n"
6452 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6453 " where kind is one of:\n"
6454 " definite indirect possible reachable all none\n"
6455 " where heur is one of:\n"
6456 " stdstring length64 newarray multipleinheritance all none*\n"
6457 " Examples: leak_check\n"
6458 " leak_check summary any\n"
6459 " leak_check full kinds indirect,possible\n"
6460 " leak_check full reachable any limited 100\n"
6461 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6462 " [unlimited*|limited <max_blocks>]\n"
6463 " [heuristics heur1,heur2,...]\n"
6464 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6465 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6466 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6467 " * = defaults\n"
6468 " who_points_at <addr> [<len>]\n"
6469 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6470 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6471 " with len > 1, will also show \"interior pointers\")\n"
6472 " xtmemory [<filename>]\n"
6473 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6474 "\n");
6477 /* Print szB bytes at address, with a format similar to the gdb command
6478 x /<szB>xb address.
6479 res[i] == 1 indicates the corresponding byte is addressable. */
6480 static void gdb_xb (Addr address, SizeT szB, Int res[])
6482 UInt i;
6484 for (i = 0; i < szB; i++) {
6485 UInt bnr = i % 8;
6486 if (bnr == 0) {
6487 if (i != 0)
6488 VG_(printf) ("\n"); // Terminate previous line
6489 VG_(printf) ("%p:", (void*)(address+i));
6491 if (res[i] == 1)
6492 VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6493 else
6494 VG_(printf) ("\t0x??");
6496 VG_(printf) ("\n"); // Terminate previous line
6500 /* Returns the address of the next non space character,
6501 or address of the string terminator. */
6502 static HChar* next_non_space (HChar *s)
6504 while (*s && *s == ' ')
6505 s++;
6506 return s;
6509 /* Parse an integer slice, i.e. a single integer or a range of integer.
6510 Syntax is:
6511 <integer>[..<integer> ]
6512 (spaces are allowed before and/or after ..).
6513 Return True if range correctly parsed, False otherwise. */
6514 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6515 UInt *from, UInt *to)
6517 HChar* wl;
6518 HChar *endptr;
6519 endptr = NULL;////
6520 wl = VG_(strtok_r) (s, " ", saveptr);
6522 /* slice must start with an integer. */
6523 if (wl == NULL) {
6524 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6525 return False;
6527 *from = VG_(strtoull10) (wl, &endptr);
6528 if (endptr == wl) {
6529 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6530 return False;
6533 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6534 /* wl token is an integer terminating the string
6535 or else next token does not start with .
6536 In both cases, the slice is a single integer. */
6537 *to = *from;
6538 return True;
6541 if (*endptr == '\0') {
6542 // iii .. => get the next token
6543 wl = VG_(strtok_r) (NULL, " .", saveptr);
6544 } else {
6545 // It must be iii..
6546 if (*endptr != '.' && *(endptr+1) != '.') {
6547 VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6548 return False;
6550 if ( *(endptr+2) == ' ') {
6551 // It must be iii.. jjj => get the next token
6552 wl = VG_(strtok_r) (NULL, " .", saveptr);
6553 } else {
6554 // It must be iii..jjj
6555 wl = endptr+2;
6559 *to = VG_(strtoull10) (wl, &endptr);
6560 if (*endptr != '\0') {
6561 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6562 return False;
6565 if (*from > *to) {
6566 VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6567 "in slice <from>..<to>\n");
6568 return False;
6571 return True;
6574 /* return True if request recognised, False otherwise */
6575 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6577 HChar* wcmd;
6578 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
6579 HChar *ssaveptr;
6581 VG_(strcpy) (s, req);
6583 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6584 /* NB: if possible, avoid introducing a new command below which
6585 starts with the same first letter(s) as an already existing
6586 command. This ensures a shorter abbreviation for the user. */
6587 switch (VG_(keyword_id)
6588 ("help get_vbits leak_check make_memory check_memory "
6589 "block_list who_points_at xb xtmemory",
6590 wcmd, kwd_report_duplicated_matches)) {
6591 case -2: /* multiple matches */
6592 return True;
6593 case -1: /* not found */
6594 return False;
6595 case 0: /* help */
6596 print_monitor_help();
6597 return True;
6598 case 1: { /* get_vbits */
6599 Addr address;
6600 SizeT szB = 1;
6601 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6602 UChar vbits;
6603 Int i;
6604 Int unaddressable = 0;
6605 for (i = 0; i < szB; i++) {
6606 Int res = mc_get_or_set_vbits_for_client
6607 (address+i, (Addr) &vbits, 1,
6608 False, /* get them */
6609 False /* is client request */ );
6610 /* we are before the first character on next line, print a \n. */
6611 if ((i % 32) == 0 && i != 0)
6612 VG_(printf) ("\n");
6613 /* we are before the next block of 4 starts, print a space. */
6614 else if ((i % 4) == 0 && i != 0)
6615 VG_(printf) (" ");
6616 if (res == 1) {
6617 VG_(printf) ("%02x", vbits);
6618 } else {
6619 tl_assert(3 == res);
6620 unaddressable++;
6621 VG_(printf) ("__");
6624 VG_(printf) ("\n");
6625 if (unaddressable) {
6626 VG_(printf)
6627 ("Address %p len %lu has %d bytes unaddressable\n",
6628 (void *)address, szB, unaddressable);
6631 return True;
6633 case 2: { /* leak_check */
6634 Int err = 0;
6635 LeakCheckParams lcp;
6636 HChar* xt_filename = NULL;
6637 HChar* kw;
6639 lcp.mode = LC_Full;
6640 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6641 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6642 lcp.heuristics = 0;
6643 lcp.deltamode = LCD_Increased;
6644 lcp.max_loss_records_output = 999999999;
6645 lcp.requested_by_monitor_command = True;
6646 lcp.xt_filename = NULL;
6648 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6649 kw != NULL;
6650 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6651 switch (VG_(keyword_id)
6652 ("full summary xtleak "
6653 "kinds reachable possibleleak definiteleak "
6654 "heuristics "
6655 "new increased changed any "
6656 "unlimited limited ",
6657 kw, kwd_report_all)) {
6658 case -2: err++; break;
6659 case -1: err++; break;
6660 case 0: /* full */
6661 lcp.mode = LC_Full; break;
6662 case 1: /* summary */
6663 lcp.mode = LC_Summary; break;
6664 case 2: /* xtleak */
6665 lcp.mode = LC_Full;
6666 xt_filename
6667 = VG_(expand_file_name)("--xtleak-mc_main.c",
6668 "xtleak.kcg.%p.%n");
6669 lcp.xt_filename = xt_filename;
6670 break;
6671 case 3: { /* kinds */
6672 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6673 if (wcmd == NULL
6674 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6675 True/*allow_all*/,
6676 wcmd,
6677 &lcp.show_leak_kinds)) {
6678 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6679 err++;
6681 break;
6683 case 4: /* reachable */
6684 lcp.show_leak_kinds = MC_(all_Reachedness)();
6685 break;
6686 case 5: /* possibleleak */
6687 lcp.show_leak_kinds
6688 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6689 break;
6690 case 6: /* definiteleak */
6691 lcp.show_leak_kinds = R2S(Unreached);
6692 break;
6693 case 7: { /* heuristics */
6694 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6695 if (wcmd == NULL
6696 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6697 True,/*allow_all*/
6698 wcmd,
6699 &lcp.heuristics)) {
6700 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6701 err++;
6703 break;
6705 case 8: /* new */
6706 lcp.deltamode = LCD_New; break;
6707 case 9: /* increased */
6708 lcp.deltamode = LCD_Increased; break;
6709 case 10: /* changed */
6710 lcp.deltamode = LCD_Changed; break;
6711 case 11: /* any */
6712 lcp.deltamode = LCD_Any; break;
6713 case 12: /* unlimited */
6714 lcp.max_loss_records_output = 999999999; break;
6715 case 13: { /* limited */
6716 Int int_value;
6717 const HChar* endptr;
6719 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6720 if (wcmd == NULL) {
6721 int_value = 0;
6722 endptr = "empty"; /* to report an error below */
6723 } else {
6724 HChar *the_end;
6725 int_value = VG_(strtoll10) (wcmd, &the_end);
6726 endptr = the_end;
6728 if (*endptr != '\0')
6729 VG_(gdb_printf) ("missing or malformed integer value\n");
6730 else if (int_value > 0)
6731 lcp.max_loss_records_output = (UInt) int_value;
6732 else
6733 VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6734 " got %d\n", int_value);
6735 break;
6737 default:
6738 tl_assert (0);
6741 if (!err)
6742 MC_(detect_memory_leaks)(tid, &lcp);
6743 if (xt_filename != NULL)
6744 VG_(free)(xt_filename);
6745 return True;
6748 case 3: { /* make_memory */
6749 Addr address;
6750 SizeT szB = 1;
6751 Int kwdid = VG_(keyword_id)
6752 ("noaccess undefined defined Definedifaddressable",
6753 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6754 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6755 return True;
6756 switch (kwdid) {
6757 case -2: break;
6758 case -1: break;
6759 case 0: MC_(make_mem_noaccess) (address, szB); break;
6760 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6761 MC_OKIND_USER ); break;
6762 case 2: MC_(make_mem_defined) ( address, szB ); break;
6763 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6764 default: tl_assert(0);
6766 return True;
6769 case 4: { /* check_memory */
6770 Addr address;
6771 SizeT szB = 1;
6772 Addr bad_addr;
6773 UInt okind;
6774 const HChar* src;
6775 UInt otag;
6776 UInt ecu;
6777 ExeContext* origin_ec;
6778 MC_ReadResult res;
6780 Int kwdid = VG_(keyword_id)
6781 ("addressable defined",
6782 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6783 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6784 return True;
6785 switch (kwdid) {
6786 case -2: break;
6787 case -1: break;
6788 case 0: /* addressable */
6789 if (is_mem_addressable ( address, szB, &bad_addr ))
6790 VG_(printf) ("Address %p len %lu addressable\n",
6791 (void *)address, szB);
6792 else
6793 VG_(printf)
6794 ("Address %p len %lu not addressable:\nbad address %p\n",
6795 (void *)address, szB, (void *) bad_addr);
6796 // Describe this (probably live) address with current epoch
6797 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6798 break;
6799 case 1: /* defined */
6800 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6801 if (MC_AddrErr == res)
6802 VG_(printf)
6803 ("Address %p len %lu not addressable:\nbad address %p\n",
6804 (void *)address, szB, (void *) bad_addr);
6805 else if (MC_ValueErr == res) {
6806 okind = otag & 3;
6807 switch (okind) {
6808 case MC_OKIND_STACK:
6809 src = " was created by a stack allocation"; break;
6810 case MC_OKIND_HEAP:
6811 src = " was created by a heap allocation"; break;
6812 case MC_OKIND_USER:
6813 src = " was created by a client request"; break;
6814 case MC_OKIND_UNKNOWN:
6815 src = ""; break;
6816 default: tl_assert(0);
6818 VG_(printf)
6819 ("Address %p len %lu not defined:\n"
6820 "Uninitialised value at %p%s\n",
6821 (void *)address, szB, (void *) bad_addr, src);
6822 ecu = otag & ~3;
6823 if (VG_(is_plausible_ECU)(ecu)) {
6824 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6825 VG_(pp_ExeContext)( origin_ec );
6828 else
6829 VG_(printf) ("Address %p len %lu defined\n",
6830 (void *)address, szB);
6831 // Describe this (probably live) address with current epoch
6832 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6833 break;
6834 default: tl_assert(0);
6836 return True;
6839 case 5: { /* block_list */
6840 HChar* wl;
6841 HChar *the_end;
6842 UInt lr_nr_from = 0;
6843 UInt lr_nr_to = 0;
6845 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6846 UInt limit_blocks = 999999999;
6847 Int int_value;
6848 UInt heuristics = 0;
6850 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6851 wl != NULL;
6852 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6853 switch (VG_(keyword_id) ("unlimited limited heuristics ",
6854 wl, kwd_report_all)) {
6855 case -2: return True;
6856 case -1: return True;
6857 case 0: /* unlimited */
6858 limit_blocks = 999999999; break;
6859 case 1: /* limited */
6860 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6861 if (wcmd == NULL) {
6862 VG_(gdb_printf) ("missing integer value\n");
6863 return True;
6865 int_value = VG_(strtoll10) (wcmd, &the_end);
6866 if (*the_end != '\0') {
6867 VG_(gdb_printf) ("malformed integer value\n");
6868 return True;
6870 if (int_value <= 0) {
6871 VG_(gdb_printf) ("max_blocks must be >= 1,"
6872 " got %d\n", int_value);
6873 return True;
6875 limit_blocks = (UInt) int_value;
6876 break;
6877 case 2: /* heuristics */
6878 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6879 if (wcmd == NULL
6880 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6881 True,/*allow_all*/
6882 wcmd,
6883 &heuristics)) {
6884 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6885 return True;
6887 break;
6888 default:
6889 tl_assert (0);
6892 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6893 is 1 more than the index in lr_array. */
6894 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6895 lr_nr_to-1,
6896 limit_blocks,
6897 heuristics))
6898 VG_(gdb_printf) ("invalid loss record nr\n");
6900 return True;
6903 case 6: { /* who_points_at */
6904 Addr address;
6905 SizeT szB = 1;
6907 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6908 return True;
6909 if (address == (Addr) 0) {
6910 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6911 return True;
6913 MC_(who_points_at) (address, szB);
6914 return True;
6917 case 7: { /* xb */
6918 Addr address;
6919 SizeT szB = 1;
6920 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6921 UChar vbits[8];
6922 Int res[8];
6923 Int i;
6924 Int unaddressable = 0;
6925 for (i = 0; i < szB; i++) {
6926 Int bnr = i % 8;
6927 /* We going to print the first vabits of a new line.
6928 Terminate the previous line if needed: prints a line with the
6929 address and the data. */
6930 if (bnr == 0) {
6931 if (i != 0) {
6932 VG_(printf) ("\n");
6933 gdb_xb (address + i - 8, 8, res);
6935 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6937 res[bnr] = mc_get_or_set_vbits_for_client
6938 (address+i, (Addr) &vbits[bnr], 1,
6939 False, /* get them */
6940 False /* is client request */ );
6941 if (res[bnr] == 1) {
6942 VG_(printf) ("\t %02x", vbits[bnr]);
6943 } else {
6944 tl_assert(3 == res[bnr]);
6945 unaddressable++;
6946 VG_(printf) ("\t __");
6949 VG_(printf) ("\n");
6950 if (szB % 8 == 0 && szB > 0)
6951 gdb_xb (address + szB - 8, 8, res);
6952 else
6953 gdb_xb (address + szB - szB % 8, szB % 8, res);
6954 if (unaddressable) {
6955 VG_(printf)
6956 ("Address %p len %lu has %d bytes unaddressable\n",
6957 (void *)address, szB, unaddressable);
6960 return True;
6963 case 8: { /* xtmemory */
6964 HChar* filename;
6965 filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
6966 MC_(xtmemory_report)(filename, False);
6967 return True;
6970 default:
6971 tl_assert(0);
6972 return False;
6976 /*------------------------------------------------------------*/
6977 /*--- Client requests ---*/
6978 /*------------------------------------------------------------*/
6980 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6982 Int i;
6983 Addr bad_addr;
6984 MC_Chunk* mc = NULL;
6986 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6987 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6988 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6989 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6990 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6991 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6992 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6993 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6994 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6995 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
6996 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
6997 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
6998 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
6999 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
7000 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
7001 return False;
7003 switch (arg[0]) {
7004 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
7005 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
7006 if (!ok)
7007 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
7008 *ret = ok ? (UWord)NULL : bad_addr;
7009 break;
7012 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
7013 Bool errorV = False;
7014 Addr bad_addrV = 0;
7015 UInt otagV = 0;
7016 Bool errorA = False;
7017 Addr bad_addrA = 0;
7018 is_mem_defined_comprehensive(
7019 arg[1], arg[2],
7020 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
7022 if (errorV) {
7023 MC_(record_user_error) ( tid, bad_addrV,
7024 /*isAddrErr*/False, otagV );
7026 if (errorA) {
7027 MC_(record_user_error) ( tid, bad_addrA,
7028 /*isAddrErr*/True, 0 );
7030 /* Return the lower of the two erring addresses, if any. */
7031 *ret = 0;
7032 if (errorV && !errorA) {
7033 *ret = bad_addrV;
7035 if (!errorV && errorA) {
7036 *ret = bad_addrA;
7038 if (errorV && errorA) {
7039 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
7041 break;
7044 case VG_USERREQ__DO_LEAK_CHECK: {
7045 LeakCheckParams lcp;
7047 if (arg[1] == 0)
7048 lcp.mode = LC_Full;
7049 else if (arg[1] == 1)
7050 lcp.mode = LC_Summary;
7051 else {
7052 VG_(message)(Vg_UserMsg,
7053 "Warning: unknown memcheck leak search mode\n");
7054 lcp.mode = LC_Full;
7057 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7058 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7059 lcp.heuristics = MC_(clo_leak_check_heuristics);
7061 if (arg[2] == 0)
7062 lcp.deltamode = LCD_Any;
7063 else if (arg[2] == 1)
7064 lcp.deltamode = LCD_Increased;
7065 else if (arg[2] == 2)
7066 lcp.deltamode = LCD_Changed;
7067 else if (arg[2] == 3)
7068 lcp.deltamode = LCD_New;
7069 else {
7070 VG_(message)
7071 (Vg_UserMsg,
7072 "Warning: unknown memcheck leak search deltamode\n");
7073 lcp.deltamode = LCD_Any;
7075 lcp.max_loss_records_output = 999999999;
7076 lcp.requested_by_monitor_command = False;
7077 lcp.xt_filename = NULL;
7079 MC_(detect_memory_leaks)(tid, &lcp);
7080 *ret = 0; /* return value is meaningless */
7081 break;
7084 case VG_USERREQ__MAKE_MEM_NOACCESS:
7085 MC_(make_mem_noaccess) ( arg[1], arg[2] );
7086 *ret = -1;
7087 break;
7089 case VG_USERREQ__MAKE_MEM_UNDEFINED:
7090 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
7091 MC_OKIND_USER );
7092 *ret = -1;
7093 break;
7095 case VG_USERREQ__MAKE_MEM_DEFINED:
7096 MC_(make_mem_defined) ( arg[1], arg[2] );
7097 *ret = -1;
7098 break;
7100 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
7101 make_mem_defined_if_addressable ( arg[1], arg[2] );
7102 *ret = -1;
7103 break;
7105 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
7106 if (arg[1] != 0 && arg[2] != 0) {
7107 i = alloc_client_block();
7108 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7109 cgbs[i].start = arg[1];
7110 cgbs[i].size = arg[2];
7111 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
7112 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
7113 *ret = i;
7114 } else
7115 *ret = -1;
7116 break;
7118 case VG_USERREQ__DISCARD: /* discard */
7119 if (cgbs == NULL
7120 || arg[2] >= cgb_used ||
7121 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
7122 *ret = 1;
7123 } else {
7124 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
7125 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
7126 VG_(free)(cgbs[arg[2]].desc);
7127 cgb_discards++;
7128 *ret = 0;
7130 break;
7132 case VG_USERREQ__GET_VBITS:
7133 *ret = mc_get_or_set_vbits_for_client
7134 ( arg[1], arg[2], arg[3],
7135 False /* get them */,
7136 True /* is client request */ );
7137 break;
7139 case VG_USERREQ__SET_VBITS:
7140 *ret = mc_get_or_set_vbits_for_client
7141 ( arg[1], arg[2], arg[3],
7142 True /* set them */,
7143 True /* is client request */ );
7144 break;
7146 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
7147 UWord** argp = (UWord**)arg;
7148 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7149 // if no prior leak checks performed).
7150 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
7151 *argp[2] = MC_(bytes_dubious);
7152 *argp[3] = MC_(bytes_reachable);
7153 *argp[4] = MC_(bytes_suppressed);
7154 // there is no argp[5]
7155 //*argp[5] = MC_(bytes_indirect);
7156 // XXX need to make *argp[1-4] defined; currently done in the
7157 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7158 *ret = 0;
7159 return True;
7161 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
7162 UWord** argp = (UWord**)arg;
7163 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7164 // if no prior leak checks performed).
7165 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
7166 *argp[2] = MC_(blocks_dubious);
7167 *argp[3] = MC_(blocks_reachable);
7168 *argp[4] = MC_(blocks_suppressed);
7169 // there is no argp[5]
7170 //*argp[5] = MC_(blocks_indirect);
7171 // XXX need to make *argp[1-4] defined; currently done in the
7172 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7173 *ret = 0;
7174 return True;
7176 case VG_USERREQ__MALLOCLIKE_BLOCK: {
7177 Addr p = (Addr)arg[1];
7178 SizeT sizeB = arg[2];
7179 UInt rzB = arg[3];
7180 Bool is_zeroed = (Bool)arg[4];
7182 MC_(new_block) ( tid, p, sizeB, /*ignored*/0U, 0U, is_zeroed,
7183 MC_AllocCustom, MC_(malloc_list) );
7184 if (rzB > 0) {
7185 MC_(make_mem_noaccess) ( p - rzB, rzB);
7186 MC_(make_mem_noaccess) ( p + sizeB, rzB);
7188 return True;
7190 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
7191 Addr p = (Addr)arg[1];
7192 SizeT oldSizeB = arg[2];
7193 SizeT newSizeB = arg[3];
7194 UInt rzB = arg[4];
7196 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
7197 return True;
7199 case VG_USERREQ__FREELIKE_BLOCK: {
7200 Addr p = (Addr)arg[1];
7201 UInt rzB = arg[2];
7203 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
7204 return True;
7207 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
7208 HChar* s = (HChar*)arg[1];
7209 Addr dst = (Addr) arg[2];
7210 Addr src = (Addr) arg[3];
7211 SizeT len = (SizeT)arg[4];
7212 MC_(record_overlap_error)(tid, s, src, dst, len);
7213 return True;
7216 case _VG_USERREQ__MEMCHECK_VERIFY_ALIGNMENT: {
7217 struct AlignedAllocInfo *aligned_alloc_info = (struct AlignedAllocInfo *)arg[1];
7218 tl_assert(aligned_alloc_info);
7220 switch (aligned_alloc_info->alloc_kind) {
7221 case AllocKindMemalign:
7222 // other platforms just ensure it is a power of 2
7223 // ignore Illumos only enforcing multiple of 4 (probably a bug)
7224 if (aligned_alloc_info->orig_alignment == 0U ||
7225 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7226 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be power of 2)" );
7228 // size zero not allowed on all platforms (e.g. Illumos)
7229 if (aligned_alloc_info->size == 0) {
7230 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "memalign()" );
7232 break;
7233 case AllocKindPosixMemalign:
7234 // must be power of 2
7235 // alignment at least sizeof(size_t)
7236 // size of 0 implementation defined
7237 if (aligned_alloc_info->orig_alignment < sizeof(SizeT) ||
7238 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7239 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero, a power of 2 and a multiple of sizeof(void*))" );
7241 if (aligned_alloc_info->size == 0) {
7242 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "posix_memalign()" );
7244 break;
7245 case AllocKindAlignedAlloc:
7246 // must be power of 2
7247 if ((aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7248 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be a power of 2)" );
7250 // size should be integral multiple of alignment
7251 if (aligned_alloc_info->orig_alignment &&
7252 aligned_alloc_info->size % aligned_alloc_info->orig_alignment != 0U) {
7253 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , aligned_alloc_info->size, " (size should be a multiple of alignment)" );
7255 if (aligned_alloc_info->size == 0) {
7256 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "aligned_alloc()" );
7258 break;
7259 case AllocKindDeleteSized:
7260 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7261 if (mc && mc->szB != aligned_alloc_info->size) {
7262 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new/delete" );
7264 break;
7265 case AllocKindVecDeleteSized:
7266 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7267 if (mc && mc->szB != aligned_alloc_info->size) {
7268 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new[][/delete[]" );
7270 break;
7271 case AllocKindNewAligned:
7272 if (aligned_alloc_info->orig_alignment == 0 ||
7273 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7274 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7276 break;
7277 case AllocKindVecNewAligned:
7278 if (aligned_alloc_info->orig_alignment == 0 ||
7279 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7280 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7282 break;
7283 case AllocKindDeleteAligned:
7284 if (aligned_alloc_info->orig_alignment == 0 ||
7285 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7286 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7288 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7289 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7290 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, "new/delete");
7292 break;
7293 case AllocKindVecDeleteAligned:
7294 if (aligned_alloc_info->orig_alignment == 0 ||
7295 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7296 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7298 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7299 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7300 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, "new[]/delete[]");
7302 break;
7303 case AllocKindDeleteSizedAligned:
7304 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7305 if (mc && mc->szB != aligned_alloc_info->size) {
7306 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new/delete");
7308 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7309 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, "new/delete");
7311 if (aligned_alloc_info->orig_alignment == 0 ||
7312 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7313 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7315 break;
7316 case AllocKindVecDeleteSizedAligned:
7317 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7318 if (mc && mc->szB != aligned_alloc_info->size) {
7319 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new[]/delete[]" );
7321 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7322 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, "new[]/delete[]");
7324 if (aligned_alloc_info->orig_alignment == 0 ||
7325 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7326 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7328 break;
7329 default:
7330 tl_assert (False);
7333 return True;
7336 case VG_USERREQ__CREATE_MEMPOOL: {
7337 Addr pool = (Addr)arg[1];
7338 UInt rzB = arg[2];
7339 Bool is_zeroed = (Bool)arg[3];
7340 UInt flags = arg[4];
7342 // The create_mempool function does not know these mempool flags,
7343 // pass as booleans.
7344 MC_(create_mempool) ( pool, rzB, is_zeroed,
7345 (flags & VALGRIND_MEMPOOL_AUTO_FREE),
7346 (flags & VALGRIND_MEMPOOL_METAPOOL) );
7347 return True;
7350 case VG_USERREQ__DESTROY_MEMPOOL: {
7351 Addr pool = (Addr)arg[1];
7353 MC_(destroy_mempool) ( pool );
7354 return True;
7357 case VG_USERREQ__MEMPOOL_ALLOC: {
7358 Addr pool = (Addr)arg[1];
7359 Addr addr = (Addr)arg[2];
7360 UInt size = arg[3];
7362 MC_(mempool_alloc) ( tid, pool, addr, size );
7363 return True;
7366 case VG_USERREQ__MEMPOOL_FREE: {
7367 Addr pool = (Addr)arg[1];
7368 Addr addr = (Addr)arg[2];
7370 MC_(mempool_free) ( pool, addr );
7371 return True;
7374 case VG_USERREQ__MEMPOOL_TRIM: {
7375 Addr pool = (Addr)arg[1];
7376 Addr addr = (Addr)arg[2];
7377 UInt size = arg[3];
7379 MC_(mempool_trim) ( pool, addr, size );
7380 return True;
7383 case VG_USERREQ__MOVE_MEMPOOL: {
7384 Addr poolA = (Addr)arg[1];
7385 Addr poolB = (Addr)arg[2];
7387 MC_(move_mempool) ( poolA, poolB );
7388 return True;
7391 case VG_USERREQ__MEMPOOL_CHANGE: {
7392 Addr pool = (Addr)arg[1];
7393 Addr addrA = (Addr)arg[2];
7394 Addr addrB = (Addr)arg[3];
7395 UInt size = arg[4];
7397 MC_(mempool_change) ( pool, addrA, addrB, size );
7398 return True;
7401 case VG_USERREQ__MEMPOOL_EXISTS: {
7402 Addr pool = (Addr)arg[1];
7404 *ret = (UWord) MC_(mempool_exists) ( pool );
7405 return True;
7408 case VG_USERREQ__GDB_MONITOR_COMMAND: {
7409 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
7410 if (handled)
7411 *ret = 1;
7412 else
7413 *ret = 0;
7414 return handled;
7417 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
7418 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
7419 Bool addRange
7420 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
7421 Bool ok
7422 = modify_ignore_ranges(addRange, arg[1], arg[2]);
7423 *ret = ok ? 1 : 0;
7424 return True;
7427 default:
7428 VG_(message)(
7429 Vg_UserMsg,
7430 "Warning: unknown memcheck client request code %llx\n",
7431 (ULong)arg[0]
7433 return False;
7435 return True;
7439 /*------------------------------------------------------------*/
7440 /*--- Crude profiling machinery. ---*/
7441 /*------------------------------------------------------------*/
7443 // We track a number of interesting events (using PROF_EVENT)
7444 // if MC_PROFILE_MEMORY is defined.
7446 #ifdef MC_PROFILE_MEMORY
7448 ULong MC_(event_ctr)[MCPE_LAST];
7450 /* Event counter names. Use the name of the function that increases the
7451 event counter. Drop any MC_() and mc_ prefices. */
7452 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
7453 [MCPE_LOADVN_SLOW] = "LOADVn_slow",
7454 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
7455 [MCPE_STOREVN_SLOW] = "STOREVn_slow",
7456 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
7457 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
7458 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
7459 "make_aligned_word32_undefined_slow",
7460 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
7461 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
7462 "make_aligned_word64_undefined_slow",
7463 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
7464 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
7465 "make_aligned_word32_noaccess_slow",
7466 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
7467 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
7468 "make_aligned_word64_noaccess_slow",
7469 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
7470 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
7471 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
7472 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
7473 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
7474 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
7475 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
7476 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
7477 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
7478 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
7479 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
7480 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
7481 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
7482 [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
7483 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
7484 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
7485 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
7486 "is_mem_defined_comprehensive(loop)",
7487 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
7488 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
7489 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
7490 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
7491 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
7492 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
7493 "set_address_range_perms(single-secmap)",
7494 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
7495 "set_address_range_perms(startof-secmap)",
7496 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
7497 "set_address_range_perms(multiple-secmaps)",
7498 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
7499 "set_address_range_perms(dist-sm1)",
7500 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
7501 "set_address_range_perms(dist-sm2)",
7502 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
7503 "set_address_range_perms(dist-sm1-quick)",
7504 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
7505 "set_address_range_perms(dist-sm2-quick)",
7506 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
7507 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
7508 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
7509 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
7510 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
7511 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
7512 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
7513 "set_address_range_perms(loop64K-free-dist-sm)",
7514 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
7515 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256",
7516 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
7517 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
7518 [MCPE_LOADV64] = "LOADV64",
7519 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1",
7520 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2",
7521 [MCPE_STOREV64] = "STOREV64",
7522 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
7523 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
7524 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
7525 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
7526 [MCPE_LOADV32] = "LOADV32",
7527 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1",
7528 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2",
7529 [MCPE_STOREV32] = "STOREV32",
7530 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
7531 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
7532 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
7533 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
7534 [MCPE_LOADV16] = "LOADV16",
7535 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1",
7536 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2",
7537 [MCPE_STOREV16] = "STOREV16",
7538 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
7539 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
7540 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
7541 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
7542 [MCPE_LOADV8] = "LOADV8",
7543 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1",
7544 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2",
7545 [MCPE_STOREV8] = "STOREV8",
7546 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1",
7547 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2",
7548 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3",
7549 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4",
7550 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4",
7551 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8",
7552 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12",
7553 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16",
7554 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32",
7555 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
7556 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
7557 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
7558 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
7559 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4",
7560 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8",
7561 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12",
7562 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16",
7563 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32",
7564 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
7565 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
7566 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
7567 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
7568 [MCPE_NEW_MEM_STACK] = "new_mem_stack",
7569 [MCPE_DIE_MEM_STACK] = "die_mem_stack",
7570 [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o",
7571 [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o",
7572 [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
7573 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
7574 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7575 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
7576 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7577 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
7578 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7581 static void init_prof_mem ( void )
7583 Int i, name_count = 0;
7585 for (i = 0; i < MCPE_LAST; i++) {
7586 MC_(event_ctr)[i] = 0;
7587 if (MC_(event_ctr_name)[i] != NULL)
7588 ++name_count;
7591 /* Make sure every profiling event has a name */
7592 tl_assert(name_count == MCPE_LAST);
7595 static void done_prof_mem ( void )
7597 Int i, n;
7598 Bool spaced = False;
7599 for (i = n = 0; i < MCPE_LAST; i++) {
7600 if (!spaced && (n % 10) == 0) {
7601 VG_(printf)("\n");
7602 spaced = True;
7604 if (MC_(event_ctr)[i] > 0) {
7605 spaced = False;
7606 ++n;
7607 VG_(printf)( "prof mem event %3d: %11llu %s\n",
7608 i, MC_(event_ctr)[i],
7609 MC_(event_ctr_name)[i]);
7614 #else
7616 static void init_prof_mem ( void ) { }
7617 static void done_prof_mem ( void ) { }
7619 #endif
7622 /*------------------------------------------------------------*/
7623 /*--- Origin tracking stuff ---*/
7624 /*------------------------------------------------------------*/
7626 /*--------------------------------------------*/
7627 /*--- Origin tracking: load handlers ---*/
7628 /*--------------------------------------------*/
7630 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7631 return or1 > or2 ? or1 : or2;
7634 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7635 OCacheLine* line;
7636 UChar descr;
7637 UWord lineoff = oc_line_offset(a);
7638 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7640 if (OC_ENABLE_ASSERTIONS) {
7641 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7644 line = find_OCacheLine( a );
7646 descr = line->u.main.descr[lineoff];
7647 if (OC_ENABLE_ASSERTIONS) {
7648 tl_assert(descr < 0x10);
7651 if (LIKELY(0 == (descr & (1 << byteoff)))) {
7652 return 0;
7653 } else {
7654 return line->u.main.w32[lineoff];
7658 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7659 OCacheLine* line;
7660 UChar descr;
7661 UWord lineoff, byteoff;
7663 if (UNLIKELY(a & 1)) {
7664 /* Handle misaligned case, slowly. */
7665 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
7666 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
7667 return merge_origins(oLo, oHi);
7670 lineoff = oc_line_offset(a);
7671 byteoff = a & 3; /* 0 or 2 */
7673 if (OC_ENABLE_ASSERTIONS) {
7674 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7676 line = find_OCacheLine( a );
7678 descr = line->u.main.descr[lineoff];
7679 if (OC_ENABLE_ASSERTIONS) {
7680 tl_assert(descr < 0x10);
7683 if (LIKELY(0 == (descr & (3 << byteoff)))) {
7684 return 0;
7685 } else {
7686 return line->u.main.w32[lineoff];
7690 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7691 OCacheLine* line;
7692 UChar descr;
7693 UWord lineoff;
7695 if (UNLIKELY(a & 3)) {
7696 /* Handle misaligned case, slowly. */
7697 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
7698 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
7699 return merge_origins(oLo, oHi);
7702 lineoff = oc_line_offset(a);
7703 if (OC_ENABLE_ASSERTIONS) {
7704 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7707 line = find_OCacheLine( a );
7709 descr = line->u.main.descr[lineoff];
7710 if (OC_ENABLE_ASSERTIONS) {
7711 tl_assert(descr < 0x10);
7714 if (LIKELY(0 == descr)) {
7715 return 0;
7716 } else {
7717 return line->u.main.w32[lineoff];
7721 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7722 OCacheLine* line;
7723 UChar descrLo, descrHi, descr;
7724 UWord lineoff;
7726 if (UNLIKELY(a & 7)) {
7727 /* Handle misaligned case, slowly. */
7728 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
7729 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
7730 return merge_origins(oLo, oHi);
7733 lineoff = oc_line_offset(a);
7734 if (OC_ENABLE_ASSERTIONS) {
7735 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7738 line = find_OCacheLine( a );
7740 descrLo = line->u.main.descr[lineoff + 0];
7741 descrHi = line->u.main.descr[lineoff + 1];
7742 descr = descrLo | descrHi;
7743 if (OC_ENABLE_ASSERTIONS) {
7744 tl_assert(descr < 0x10);
7747 if (LIKELY(0 == descr)) {
7748 return 0; /* both 32-bit chunks are defined */
7749 } else {
7750 UInt oLo = descrLo == 0 ? 0 : line->u.main.w32[lineoff + 0];
7751 UInt oHi = descrHi == 0 ? 0 : line->u.main.w32[lineoff + 1];
7752 return merge_origins(oLo, oHi);
7756 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7757 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
7758 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
7759 UInt oBoth = merge_origins(oLo, oHi);
7760 return (UWord)oBoth;
7763 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7764 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
7765 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
7766 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
7767 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
7768 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
7769 merge_origins(oQ2, oQ3));
7770 return (UWord)oAll;
7774 /*--------------------------------------------*/
7775 /*--- Origin tracking: store handlers ---*/
7776 /*--------------------------------------------*/
7778 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7779 OCacheLine* line;
7780 UWord lineoff = oc_line_offset(a);
7781 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7783 if (OC_ENABLE_ASSERTIONS) {
7784 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7787 line = find_OCacheLine( a );
7789 #if OC_PRECISION_STORE
7790 if (LIKELY(d32 == 0)) {
7791 // The byte is defined. Just mark it as so in the descr and leave the w32
7792 // unchanged. This may make the descr become zero, so the line no longer
7793 // contains useful info, but that's OK. No loss of information.
7794 line->u.main.descr[lineoff] &= ~(1 << byteoff);
7795 } else if (d32 == line->u.main.w32[lineoff]) {
7796 // At least one of the four bytes in the w32 is undefined with the same
7797 // origin. Just extend the mask. No loss of information.
7798 line->u.main.descr[lineoff] |= (1 << byteoff);
7799 } else {
7800 // Here, we have a conflict: at least one byte in the group is undefined
7801 // but with some other origin. We can't represent both origins, so we
7802 // forget about the previous origin and install this one instead.
7803 line->u.main.descr[lineoff] = (1 << byteoff);
7804 line->u.main.w32[lineoff] = d32;
7806 #else
7807 if (d32 == 0) {
7808 line->u.main.descr[lineoff] &= ~(1 << byteoff);
7809 } else {
7810 line->u.main.descr[lineoff] |= (1 << byteoff);
7811 line->u.main.w32[lineoff] = d32;
7813 #endif
7816 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7817 OCacheLine* line;
7818 UWord lineoff, byteoff;
7820 if (UNLIKELY(a & 1)) {
7821 /* Handle misaligned case, slowly. */
7822 MC_(helperc_b_store1)( a + 0, d32 );
7823 MC_(helperc_b_store1)( a + 1, d32 );
7824 return;
7827 lineoff = oc_line_offset(a);
7828 byteoff = a & 3; /* 0 or 2 */
7830 if (OC_ENABLE_ASSERTIONS) {
7831 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7834 line = find_OCacheLine( a );
7836 #if OC_PRECISION_STORE
7837 // Same logic as in the store1 case above.
7838 if (LIKELY(d32 == 0)) {
7839 line->u.main.descr[lineoff] &= ~(3 << byteoff);
7840 } else if (d32 == line->u.main.w32[lineoff]) {
7841 line->u.main.descr[lineoff] |= (3 << byteoff);
7842 line->u.main.w32[lineoff] = d32;
7843 } else {
7844 line->u.main.descr[lineoff] = (3 << byteoff);
7845 line->u.main.w32[lineoff] = d32;
7847 #else
7848 if (d32 == 0) {
7849 line->u.main.descr[lineoff] &= ~(3 << byteoff);
7850 } else {
7851 line->u.main.descr[lineoff] |= (3 << byteoff);
7852 line->u.main.w32[lineoff] = d32;
7854 #endif
7857 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7858 OCacheLine* line;
7859 UWord lineoff;
7861 if (UNLIKELY(a & 3)) {
7862 /* Handle misaligned case, slowly. */
7863 MC_(helperc_b_store2)( a + 0, d32 );
7864 MC_(helperc_b_store2)( a + 2, d32 );
7865 return;
7868 lineoff = oc_line_offset(a);
7869 if (OC_ENABLE_ASSERTIONS) {
7870 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7873 line = find_OCacheLine( a );
7875 if (d32 == 0) {
7876 line->u.main.descr[lineoff] = 0;
7877 } else {
7878 line->u.main.descr[lineoff] = 0xF;
7879 line->u.main.w32[lineoff] = d32;
7883 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7884 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7885 OCacheLine* line;
7886 UWord lineoff;
7888 if (UNLIKELY(a & 7)) {
7889 /* Handle misaligned case, slowly. */
7890 MC_(helperc_b_store4)( a + 0, d32 );
7891 MC_(helperc_b_store4)( a + 4, d32 );
7892 return;
7895 lineoff = oc_line_offset(a);
7896 if (OC_ENABLE_ASSERTIONS) {
7897 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7900 line = find_OCacheLine( a );
7902 if (d32 == 0) {
7903 line->u.main.descr[lineoff + 0] = 0;
7904 line->u.main.descr[lineoff + 1] = 0;
7905 } else {
7906 line->u.main.descr[lineoff + 0] = 0xF;
7907 line->u.main.descr[lineoff + 1] = 0xF;
7908 line->u.main.w32[lineoff + 0] = d32;
7909 line->u.main.w32[lineoff + 1] = d32;
7913 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7914 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7915 OCacheLine* line;
7916 UWord lineoff;
7918 if (UNLIKELY(a & 15)) {
7919 /* Handle misaligned case, slowly. */
7920 MC_(helperc_b_store8)( a + 0, d32 );
7921 MC_(helperc_b_store8)( a + 8, d32 );
7922 return;
7925 lineoff = oc_line_offset(a);
7926 if (OC_ENABLE_ASSERTIONS) {
7927 tl_assert(lineoff == (lineoff & 4)); /*0,4*//*since 16-aligned*/
7930 line = find_OCacheLine( a );
7932 if (d32 == 0) {
7933 line->u.main.descr[lineoff + 0] = 0;
7934 line->u.main.descr[lineoff + 1] = 0;
7935 line->u.main.descr[lineoff + 2] = 0;
7936 line->u.main.descr[lineoff + 3] = 0;
7937 } else {
7938 line->u.main.descr[lineoff + 0] = 0xF;
7939 line->u.main.descr[lineoff + 1] = 0xF;
7940 line->u.main.descr[lineoff + 2] = 0xF;
7941 line->u.main.descr[lineoff + 3] = 0xF;
7942 line->u.main.w32[lineoff + 0] = d32;
7943 line->u.main.w32[lineoff + 1] = d32;
7944 line->u.main.w32[lineoff + 2] = d32;
7945 line->u.main.w32[lineoff + 3] = d32;
7949 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7950 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7951 OCacheLine* line;
7952 UWord lineoff;
7954 if (UNLIKELY(a & 31)) {
7955 /* Handle misaligned case, slowly. */
7956 MC_(helperc_b_store16)( a + 0, d32 );
7957 MC_(helperc_b_store16)( a + 16, d32 );
7958 return;
7961 lineoff = oc_line_offset(a);
7962 if (OC_ENABLE_ASSERTIONS) {
7963 tl_assert(lineoff == 0);
7966 line = find_OCacheLine( a );
7968 if (d32 == 0) {
7969 line->u.main.descr[0] = 0;
7970 line->u.main.descr[1] = 0;
7971 line->u.main.descr[2] = 0;
7972 line->u.main.descr[3] = 0;
7973 line->u.main.descr[4] = 0;
7974 line->u.main.descr[5] = 0;
7975 line->u.main.descr[6] = 0;
7976 line->u.main.descr[7] = 0;
7977 } else {
7978 line->u.main.descr[0] = 0xF;
7979 line->u.main.descr[1] = 0xF;
7980 line->u.main.descr[2] = 0xF;
7981 line->u.main.descr[3] = 0xF;
7982 line->u.main.descr[4] = 0xF;
7983 line->u.main.descr[5] = 0xF;
7984 line->u.main.descr[6] = 0xF;
7985 line->u.main.descr[7] = 0xF;
7986 line->u.main.w32[0] = d32;
7987 line->u.main.w32[1] = d32;
7988 line->u.main.w32[2] = d32;
7989 line->u.main.w32[3] = d32;
7990 line->u.main.w32[4] = d32;
7991 line->u.main.w32[5] = d32;
7992 line->u.main.w32[6] = d32;
7993 line->u.main.w32[7] = d32;
7998 /*--------------------------------------------*/
7999 /*--- Origin tracking: sarp handlers ---*/
8000 /*--------------------------------------------*/
8002 // We may get asked to do very large SARPs (bug 446103), hence it is important
8003 // to process 32-byte chunks at a time when possible.
8005 __attribute__((noinline))
8006 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
8007 if ((a & 1) && len >= 1) {
8008 MC_(helperc_b_store1)( a, otag );
8009 a++;
8010 len--;
8012 if ((a & 2) && len >= 2) {
8013 MC_(helperc_b_store2)( a, otag );
8014 a += 2;
8015 len -= 2;
8017 if ((a & 4) && len >= 4) {
8018 MC_(helperc_b_store4)( a, otag );
8019 a += 4;
8020 len -= 4;
8022 if ((a & 8) && len >= 8) {
8023 MC_(helperc_b_store8)( a, otag );
8024 a += 8;
8025 len -= 8;
8027 if ((a & 16) && len >= 16) {
8028 MC_(helperc_b_store16)( a, otag );
8029 a += 16;
8030 len -= 16;
8032 if (len >= 32) {
8033 tl_assert(0 == (a & 31));
8034 while (len >= 32) {
8035 MC_(helperc_b_store32)( a, otag );
8036 a += 32;
8037 len -= 32;
8040 if (len >= 16) {
8041 MC_(helperc_b_store16)( a, otag );
8042 a += 16;
8043 len -= 16;
8045 if (len >= 8) {
8046 MC_(helperc_b_store8)( a, otag );
8047 a += 8;
8048 len -= 8;
8050 if (len >= 4) {
8051 MC_(helperc_b_store4)( a, otag );
8052 a += 4;
8053 len -= 4;
8055 if (len >= 2) {
8056 MC_(helperc_b_store2)( a, otag );
8057 a += 2;
8058 len -= 2;
8060 if (len >= 1) {
8061 MC_(helperc_b_store1)( a, otag );
8062 //a++;
8063 len--;
8065 tl_assert(len == 0);
8068 __attribute__((noinline))
8069 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
8070 if ((a & 1) && len >= 1) {
8071 MC_(helperc_b_store1)( a, 0 );
8072 a++;
8073 len--;
8075 if ((a & 2) && len >= 2) {
8076 MC_(helperc_b_store2)( a, 0 );
8077 a += 2;
8078 len -= 2;
8080 if ((a & 4) && len >= 4) {
8081 MC_(helperc_b_store4)( a, 0 );
8082 a += 4;
8083 len -= 4;
8085 if ((a & 8) && len >= 8) {
8086 MC_(helperc_b_store8)( a, 0 );
8087 a += 8;
8088 len -= 8;
8090 if ((a & 16) && len >= 16) {
8091 MC_(helperc_b_store16)( a, 0 );
8092 a += 16;
8093 len -= 16;
8095 if (len >= 32) {
8096 tl_assert(0 == (a & 31));
8097 while (len >= 32) {
8098 MC_(helperc_b_store32)( a, 0 );
8099 a += 32;
8100 len -= 32;
8103 if (len >= 16) {
8104 MC_(helperc_b_store16)( a, 0 );
8105 a += 16;
8106 len -= 16;
8108 if (len >= 8) {
8109 MC_(helperc_b_store8)( a, 0 );
8110 a += 8;
8111 len -= 8;
8113 if (len >= 4) {
8114 MC_(helperc_b_store4)( a, 0 );
8115 a += 4;
8116 len -= 4;
8118 if (len >= 2) {
8119 MC_(helperc_b_store2)( a, 0 );
8120 a += 2;
8121 len -= 2;
8123 if (len >= 1) {
8124 MC_(helperc_b_store1)( a, 0 );
8125 //a++;
8126 len--;
8128 tl_assert(len == 0);
8132 /*------------------------------------------------------------*/
8133 /*--- Setup and finalisation ---*/
8134 /*------------------------------------------------------------*/
8136 static void mc_post_clo_init ( void )
8138 /* If we've been asked to emit XML, mash around various other
8139 options so as to constrain the output somewhat. */
8140 if (VG_(clo_xml)) {
8141 /* Extract as much info as possible from the leak checker. */
8142 MC_(clo_leak_check) = LC_Full;
8145 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
8146 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8147 VG_(message)(Vg_UserMsg,
8148 "Warning: --freelist-big-blocks value %lld has no effect\n"
8149 "as it is >= to --freelist-vol value %lld\n",
8150 MC_(clo_freelist_big_blocks),
8151 MC_(clo_freelist_vol));
8154 if (MC_(clo_workaround_gcc296_bugs)
8155 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8156 VG_(umsg)(
8157 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
8158 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
8159 "\n"
8163 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
8165 if (MC_(clo_mc_level) == 3) {
8166 /* We're doing origin tracking. */
8167 # ifdef PERF_FAST_STACK
8168 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
8169 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
8170 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
8171 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
8172 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
8173 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
8174 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
8175 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
8176 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
8177 # endif
8178 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
8179 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
8180 } else {
8181 /* Not doing origin tracking */
8182 # ifdef PERF_FAST_STACK
8183 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
8184 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
8185 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
8186 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
8187 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
8188 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
8189 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
8190 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
8191 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
8192 # endif
8193 VG_(track_new_mem_stack) ( mc_new_mem_stack );
8194 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
8197 // We assume that brk()/sbrk() does not initialise new memory. Is this
8198 // accurate? John Reiser says:
8200 // 0) sbrk() can *decrease* process address space. No zero fill is done
8201 // for a decrease, not even the fragment on the high end of the last page
8202 // that is beyond the new highest address. For maximum safety and
8203 // portability, then the bytes in the last page that reside above [the
8204 // new] sbrk(0) should be considered to be uninitialized, but in practice
8205 // it is exceedingly likely that they will retain their previous
8206 // contents.
8208 // 1) If an increase is large enough to require new whole pages, then
8209 // those new whole pages (like all new pages) are zero-filled by the
8210 // operating system. So if sbrk(0) already is page aligned, then
8211 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
8213 // 2) Any increase that lies within an existing allocated page is not
8214 // changed. So if (x = sbrk(0)) is not page aligned, then
8215 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
8216 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
8217 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
8218 // of them come along for the ride because the operating system deals
8219 // only in whole pages. Again, for maximum safety and portability, then
8220 // anything that lives above [the new] sbrk(0) should be considered
8221 // uninitialized, but in practice will retain previous contents [zero in
8222 // this case.]"
8224 // In short:
8226 // A key property of sbrk/brk is that new whole pages that are supplied
8227 // by the operating system *do* get initialized to zero.
8229 // As for the portability of all this:
8231 // sbrk and brk are not POSIX. However, any system that is a derivative
8232 // of *nix has sbrk and brk because there are too many software (such as
8233 // the Bourne shell) which rely on the traditional memory map (.text,
8234 // .data+.bss, stack) and the existence of sbrk/brk.
8236 // So we should arguably observe all this. However:
8237 // - The current inaccuracy has caused maybe one complaint in seven years(?)
8238 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
8239 // doubt most programmers know the above information.
8240 // So I'm not terribly unhappy with marking it as undefined. --njn.
8242 // [More: I think most of what John said only applies to sbrk(). It seems
8243 // that brk() always deals in whole pages. And since this event deals
8244 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
8245 // just mark all memory it allocates as defined.]
8247 # if !defined(VGO_solaris)
8248 if (MC_(clo_mc_level) == 3)
8249 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
8250 else
8251 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
8252 # else
8253 // On Solaris, brk memory has to be marked as defined, otherwise we get
8254 // many false positives.
8255 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
8256 # endif
8258 /* This origin tracking cache is huge (~100M), so only initialise
8259 if we need it. */
8260 if (MC_(clo_mc_level) >= 3) {
8261 init_OCache();
8262 tl_assert(ocacheL1 != NULL);
8263 for (UInt i = 0; i < 4096; i++ ) {
8264 tl_assert(ocachesL2[i] != NULL);
8266 } else {
8267 tl_assert(ocacheL1 == NULL);
8268 for (UInt i = 0; i < 4096; i++ ) {
8269 tl_assert(ocachesL2[i] == NULL);
8273 MC_(chunk_poolalloc) = VG_(newPA)
8274 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
8275 1000,
8276 VG_(malloc),
8277 "mc.cMC.1 (MC_Chunk pools)",
8278 VG_(free));
8280 /* Do not check definedness of guest state if --undef-value-errors=no */
8281 if (MC_(clo_mc_level) >= 2)
8282 VG_(track_pre_reg_read) ( mc_pre_reg_read );
8284 if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
8285 if (MC_(clo_keep_stacktraces) == KS_none
8286 || MC_(clo_keep_stacktraces) == KS_free)
8287 VG_(fmsg_bad_option)("--keep-stacktraces",
8288 "To use --xtree-memory=full, you must"
8289 " keep at least the alloc stacktrace\n");
8290 // Activate full xtree memory profiling.
8291 VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
8296 static void print_SM_info(const HChar* type, Int n_SMs)
8298 VG_(message)(Vg_DebugMsg,
8299 " memcheck: SMs: %s = %d (%luk, %luM)\n",
8300 type,
8301 n_SMs,
8302 n_SMs * sizeof(SecMap) / 1024UL,
8303 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
8306 static void mc_print_stats (void)
8308 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
8310 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
8311 VG_(free_queue_volume), VG_(free_queue_length));
8312 VG_(message)(Vg_DebugMsg,
8313 " memcheck: sanity checks: %d cheap, %d expensive\n",
8314 n_sanity_cheap, n_sanity_expensive );
8315 VG_(message)(Vg_DebugMsg,
8316 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
8317 n_auxmap_L2_nodes,
8318 n_auxmap_L2_nodes * 64,
8319 n_auxmap_L2_nodes / 16 );
8320 VG_(message)(Vg_DebugMsg,
8321 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
8322 n_auxmap_L1_searches, n_auxmap_L1_cmps,
8323 (10ULL * n_auxmap_L1_cmps)
8324 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
8326 VG_(message)(Vg_DebugMsg,
8327 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
8328 n_auxmap_L2_searches, n_auxmap_L2_nodes
8331 print_SM_info("n_issued ", n_issued_SMs);
8332 print_SM_info("n_deissued ", n_deissued_SMs);
8333 print_SM_info("max_noaccess ", max_noaccess_SMs);
8334 print_SM_info("max_undefined", max_undefined_SMs);
8335 print_SM_info("max_defined ", max_defined_SMs);
8336 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
8338 // Three DSMs, plus the non-DSM ones
8339 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
8340 // The 3*sizeof(Word) bytes is the AVL node metadata size.
8341 // The VG_ROUNDUP is because the OSet pool allocator will/must align
8342 // the elements on pointer size.
8343 // Note that the pool allocator has some additional small overhead
8344 // which is not counted in the below.
8345 // Hardwiring this logic sucks, but I don't see how else to do it.
8346 max_secVBit_szB = max_secVBit_nodes *
8347 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
8348 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
8350 VG_(message)(Vg_DebugMsg,
8351 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
8352 max_secVBit_nodes, max_secVBit_szB / 1024,
8353 max_secVBit_szB / (1024 * 1024));
8354 VG_(message)(Vg_DebugMsg,
8355 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8356 sec_vbits_new_nodes + sec_vbits_updates,
8357 sec_vbits_new_nodes, sec_vbits_updates );
8358 VG_(message)(Vg_DebugMsg,
8359 " memcheck: max shadow mem size: %luk, %luM\n",
8360 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
8362 if (MC_(clo_mc_level) >= 3) {
8363 VG_(message)(Vg_DebugMsg,
8364 " ocacheL1: %'14lu refs %'14lu misses (%'lu lossage)\n",
8365 stats_ocacheL1_find,
8366 stats_ocacheL1_misses,
8367 stats_ocacheL1_lossage );
8368 VG_(message)(Vg_DebugMsg,
8369 " ocacheL1: %'14lu at 0 %'14lu at 1\n",
8370 stats_ocacheL1_find - stats_ocacheL1_misses
8371 - stats_ocacheL1_found_at_1
8372 - stats_ocacheL1_found_at_N,
8373 stats_ocacheL1_found_at_1 );
8374 VG_(message)(Vg_DebugMsg,
8375 " ocacheL1: %'14lu at 2+ %'14lu move-fwds\n",
8376 stats_ocacheL1_found_at_N,
8377 stats_ocacheL1_movefwds );
8378 VG_(message)(Vg_DebugMsg,
8379 " ocacheL1: %'14lu sizeB %'14d useful\n",
8380 (SizeT)sizeof(OCache),
8381 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
8382 VG_(message)(Vg_DebugMsg,
8383 " ocacheL2: %'14lu finds %'14lu misses\n",
8384 stats__ocacheL2_finds,
8385 stats__ocacheL2_misses );
8386 VG_(message)(Vg_DebugMsg,
8387 " ocacheL2: %'14lu adds %'14lu dels\n",
8388 stats__ocacheL2_adds,
8389 stats__ocacheL2_dels );
8390 VG_(message)(Vg_DebugMsg,
8391 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8392 stats__ocacheL2_n_nodes_max,
8393 stats__ocacheL2_n_nodes );
8394 VG_(message)(Vg_DebugMsg,
8395 " niacache: %'12lu refs %'12lu misses\n",
8396 stats__nia_cache_queries, stats__nia_cache_misses);
8397 } else {
8398 tl_assert(ocacheL1 == NULL);
8399 for (UInt i = 0; i < 4096; i++ ) {
8400 tl_assert(ocachesL2[1] == NULL);
8406 static void mc_fini ( Int exitcode )
8408 MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
8409 MC_(print_malloc_stats)();
8411 if (MC_(clo_leak_check) != LC_Off) {
8412 LeakCheckParams lcp;
8413 HChar* xt_filename = NULL;
8414 lcp.mode = MC_(clo_leak_check);
8415 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
8416 lcp.heuristics = MC_(clo_leak_check_heuristics);
8417 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
8418 lcp.deltamode = LCD_Any;
8419 lcp.max_loss_records_output = 999999999;
8420 lcp.requested_by_monitor_command = False;
8421 if (MC_(clo_xtree_leak)) {
8422 xt_filename = VG_(expand_file_name)("--xtree-leak-file",
8423 MC_(clo_xtree_leak_file));
8424 lcp.xt_filename = xt_filename;
8425 lcp.mode = LC_Full;
8426 lcp.show_leak_kinds = MC_(all_Reachedness)();
8428 else
8429 lcp.xt_filename = NULL;
8430 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
8431 if (MC_(clo_xtree_leak))
8432 VG_(free)(xt_filename);
8433 } else {
8434 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8435 VG_(umsg)(
8436 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8437 "\n"
8442 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
8443 && MC_(clo_mc_level) == 2) {
8444 VG_(message)(Vg_UserMsg,
8445 "Use --track-origins=yes to see where "
8446 "uninitialised values come from\n");
8449 /* Print a warning if any client-request generated ignore-ranges
8450 still exist. It would be reasonable to expect that a properly
8451 written program would remove any such ranges before exiting, and
8452 since they are a bit on the dangerous side, let's comment. By
8453 contrast ranges which are specified on the command line normally
8454 pertain to hardware mapped into the address space, and so we
8455 can't expect the client to have got rid of them. */
8456 if (gIgnoredAddressRanges) {
8457 UInt i, nBad = 0;
8458 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
8459 UWord val = IAR_INVALID;
8460 UWord key_min = ~(UWord)0;
8461 UWord key_max = (UWord)0;
8462 VG_(indexRangeMap)( &key_min, &key_max, &val,
8463 gIgnoredAddressRanges, i );
8464 if (val != IAR_ClientReq)
8465 continue;
8466 /* Print the offending range. Also, if it is the first,
8467 print a banner before it. */
8468 nBad++;
8469 if (nBad == 1) {
8470 VG_(umsg)(
8471 "WARNING: exiting program has the following client-requested\n"
8472 "WARNING: address error disablement range(s) still in force,\n"
8473 "WARNING: "
8474 "possibly as a result of some mistake in the use of the\n"
8475 "WARNING: "
8476 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8479 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n",
8480 i, key_min, key_max, showIARKind(val));
8484 done_prof_mem();
8486 if (VG_(clo_stats))
8487 mc_print_stats();
8489 if (0) {
8490 VG_(message)(Vg_DebugMsg,
8491 "------ Valgrind's client block stats follow ---------------\n" );
8492 show_client_block_stats();
8496 /* mark the given addr/len unaddressable for watchpoint implementation
8497 The PointKind will be handled at access time */
8498 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
8499 Addr addr, SizeT len)
8501 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8502 accessibility and definedness in gdbserver so as to allow restoring it
8503 properly. Currently, we assume that the user only watches things
8504 which are properly addressable and defined */
8505 if (insert)
8506 MC_(make_mem_noaccess) (addr, len);
8507 else
8508 MC_(make_mem_defined) (addr, len);
8509 return True;
8512 static void mc_pre_clo_init(void)
8514 VG_(details_name) ("Memcheck");
8515 VG_(details_version) (NULL);
8516 VG_(details_description) ("a memory error detector");
8517 VG_(details_copyright_author)(
8518 "Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.");
8519 VG_(details_bug_reports_to) (VG_BUGS_TO);
8520 VG_(details_avg_translation_sizeB) ( 640 );
8522 VG_(basic_tool_funcs) (mc_post_clo_init,
8523 MC_(instrument),
8524 mc_fini);
8526 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
8529 VG_(needs_core_errors) ();
8530 VG_(needs_tool_errors) (MC_(eq_Error),
8531 MC_(before_pp_Error),
8532 MC_(pp_Error),
8533 True,/*show TIDs for errors*/
8534 MC_(update_Error_extra),
8535 MC_(is_recognised_suppression),
8536 MC_(read_extra_suppression_info),
8537 MC_(error_matches_suppression),
8538 MC_(get_error_name),
8539 MC_(get_extra_suppression_info),
8540 MC_(print_extra_suppression_use),
8541 MC_(update_extra_suppression_use));
8542 VG_(needs_libc_freeres) ();
8543 VG_(needs_cxx_freeres) ();
8544 VG_(needs_command_line_options)(mc_process_cmd_line_options,
8545 mc_print_usage,
8546 mc_print_debug_usage);
8547 VG_(needs_client_requests) (mc_handle_client_request);
8548 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
8549 mc_expensive_sanity_check);
8550 VG_(needs_print_stats) (mc_print_stats);
8551 VG_(needs_info_location) (MC_(pp_describe_addr));
8552 VG_(needs_malloc_replacement) (MC_(malloc),
8553 MC_(__builtin_new),
8554 MC_(__builtin_new_aligned),
8555 MC_(__builtin_vec_new),
8556 MC_(__builtin_vec_new_aligned),
8557 MC_(memalign),
8558 MC_(calloc),
8559 MC_(free),
8560 MC_(__builtin_delete),
8561 MC_(__builtin_delete_aligned),
8562 MC_(__builtin_vec_delete),
8563 MC_(__builtin_vec_delete_aligned),
8564 MC_(realloc),
8565 MC_(malloc_usable_size),
8566 MC_MALLOC_DEFAULT_REDZONE_SZB );
8567 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
8569 VG_(needs_xml_output) ();
8571 VG_(track_new_mem_startup) ( mc_new_mem_startup );
8573 // Handling of mmap and mprotect isn't simple (well, it is simple,
8574 // but the justification isn't.) See comments above, just prior to
8575 // mc_new_mem_mmap.
8576 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
8577 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
8579 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
8581 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
8582 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
8583 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
8585 /* Defer the specification of the new_mem_stack functions to the
8586 post_clo_init function, since we need to first parse the command
8587 line before deciding which set to use. */
8589 # ifdef PERF_FAST_STACK
8590 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
8591 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
8592 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
8593 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
8594 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
8595 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
8596 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
8597 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
8598 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
8599 # endif
8600 VG_(track_die_mem_stack) ( mc_die_mem_stack );
8602 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
8604 VG_(track_pre_mem_read) ( check_mem_is_defined );
8605 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
8606 VG_(track_pre_mem_write) ( check_mem_is_addressable );
8607 VG_(track_post_mem_write) ( mc_post_mem_write );
8609 VG_(track_post_reg_write) ( mc_post_reg_write );
8610 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
8612 if (MC_(clo_mc_level) >= 2) {
8613 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
8614 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
8617 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
8619 init_shadow_memory();
8620 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8621 tl_assert(MC_(chunk_poolalloc) == NULL);
8622 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
8623 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
8624 init_prof_mem();
8626 tl_assert( mc_expensive_sanity_check() );
8628 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8629 tl_assert(sizeof(UWord) == sizeof(Addr));
8630 // Call me paranoid. I don't care.
8631 tl_assert(sizeof(void*) == sizeof(Addr));
8633 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8634 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
8636 /* This is small. Always initialise it. */
8637 init_nia_to_ecu_cache();
8639 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8640 if we need to, since the command line args haven't been
8641 processed yet. Hence defer it to mc_post_clo_init. */
8642 tl_assert(ocacheL1 == NULL);
8643 for (UInt i = 0; i < 4096; i++ ) {
8644 tl_assert(ocachesL2[i] == NULL);
8647 /* Check some important stuff. See extensive comments above
8648 re UNALIGNED_OR_HIGH for background. */
8649 # if VG_WORDSIZE == 4
8650 tl_assert(sizeof(void*) == 4);
8651 tl_assert(sizeof(Addr) == 4);
8652 tl_assert(sizeof(UWord) == 4);
8653 tl_assert(sizeof(Word) == 4);
8654 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
8655 tl_assert(MASK(1) == 0UL);
8656 tl_assert(MASK(2) == 1UL);
8657 tl_assert(MASK(4) == 3UL);
8658 tl_assert(MASK(8) == 7UL);
8659 # else
8660 tl_assert(VG_WORDSIZE == 8);
8661 tl_assert(sizeof(void*) == 8);
8662 tl_assert(sizeof(Addr) == 8);
8663 tl_assert(sizeof(UWord) == 8);
8664 tl_assert(sizeof(Word) == 8);
8665 tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
8666 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
8667 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
8668 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
8669 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
8670 # endif
8672 /* Check some assertions to do with the instrumentation machinery. */
8673 MC_(do_instrumentation_startup_checks)();
8676 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
8678 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
8680 /*--------------------------------------------------------------------*/
8681 /*--- end mc_main.c ---*/
8682 /*--------------------------------------------------------------------*/