syswrap openat2 for all linux arches
[valgrind.git] / memcheck / mc_main.c
blobbe237eeb422039a3cd04a02ab52aa4b4736fb33d
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
6 /*--- mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
9 /*
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
14 jseward@acm.org
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_aspacemgr.h"
34 #include "pub_tool_gdbserver.h"
35 #include "pub_tool_poolalloc.h"
36 #include "pub_tool_hashtable.h" // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_rangemap.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48 #include "pub_tool_xarray.h"
49 #include "pub_tool_xtree.h"
50 #include "pub_tool_xtmemory.h"
52 #include "mc_include.h"
53 #include "memcheck.h" /* for client requests */
55 /* Set to 1 to do a little more sanity checking */
56 #define VG_DEBUG_MEMORY 0
58 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
60 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
61 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
64 /*------------------------------------------------------------*/
65 /*--- Fast-case knobs ---*/
66 /*------------------------------------------------------------*/
68 // Comment these out to disable the fast cases (don't just set them to zero).
70 /* PERF_FAST_LOADV is in mc_include.h */
71 #define PERF_FAST_STOREV 1
73 #define PERF_FAST_SARP 1
75 #define PERF_FAST_STACK 1
76 #define PERF_FAST_STACK2 1
78 /* Change this to 1 to enable assertions on origin tracking cache fast
79 paths */
80 #define OC_ENABLE_ASSERTIONS 0
82 /* Change this to 1 for experimental, higher precision origin tracking
83 8- and 16-bit store handling. */
84 #define OC_PRECISION_STORE 1
87 /*------------------------------------------------------------*/
88 /*--- Comments on the origin tracking implementation ---*/
89 /*------------------------------------------------------------*/
91 /* See detailed comment entitled
92 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
93 which is contained further on in this file. */
96 /*------------------------------------------------------------*/
97 /*--- V bits and A bits ---*/
98 /*------------------------------------------------------------*/
100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
101 thinks the corresponding value bit is defined. And every memory byte
102 has an A bit, which tracks whether Memcheck thinks the program can access
103 it safely (ie. it's mapped, and has at least one of the RWX permission bits
104 set). So every N-bit register is shadowed with N V bits, and every memory
105 byte is shadowed with 8 V bits and one A bit.
107 In the implementation, we use two forms of compression (compressed V bits
108 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
109 for memory.
111 Memcheck also tracks extra information about each heap block that is
112 allocated, for detecting memory leaks and other purposes.
115 /*------------------------------------------------------------*/
116 /*--- Basic A/V bitmap representation. ---*/
117 /*------------------------------------------------------------*/
119 /* All reads and writes are checked against a memory map (a.k.a. shadow
120 memory), which records the state of all memory in the process.
122 On 32-bit machines the memory map is organised as follows.
123 The top 16 bits of an address are used to index into a top-level
124 map table, containing 65536 entries. Each entry is a pointer to a
125 second-level map, which records the accesibililty and validity
126 permissions for the 65536 bytes indexed by the lower 16 bits of the
127 address. Each byte is represented by two bits (details are below). So
128 each second-level map contains 16384 bytes. This two-level arrangement
129 conveniently divides the 4G address space into 64k lumps, each size 64k
130 bytes.
132 All entries in the primary (top-level) map must point to a valid
133 secondary (second-level) map. Since many of the 64kB chunks will
134 have the same status for every bit -- ie. noaccess (for unused
135 address space) or entirely addressable and defined (for code segments) --
136 there are three distinguished secondary maps, which indicate 'noaccess',
137 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
138 map entry points to the relevant distinguished map. In practice,
139 typically more than half of the addressable memory is represented with
140 the 'undefined' or 'defined' distinguished secondary map, so it gives a
141 good saving. It also lets us set the V+A bits of large address regions
142 quickly in set_address_range_perms().
144 On 64-bit machines it's more complicated. If we followed the same basic
145 scheme we'd have a four-level table which would require too many memory
146 accesses. So instead the top-level map table has 2^20 entries (indexed
147 using bits 16..35 of the address); this covers the bottom 64GB. Any
148 accesses above 64GB are handled with a slow, sparse auxiliary table.
149 Valgrind's address space manager tries very hard to keep things below
150 this 64GB barrier so that performance doesn't suffer too much.
152 Note that this file has a lot of different functions for reading and
153 writing shadow memory. Only a couple are strictly necessary (eg.
154 get_vabits2 and set_vabits2), most are just specialised for specific
155 common cases to improve performance.
157 Aside: the V+A bits are less precise than they could be -- we have no way
158 of marking memory as read-only. It would be great if we could add an
159 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
160 which requires 2.3 bits to hold, and there's no way to do that elegantly
161 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
162 seem worth it.
165 /* --------------- Basic configuration --------------- */
167 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
169 #if VG_WORDSIZE == 4
171 /* cover the entire address space */
172 # define N_PRIMARY_BITS 16
174 #else
176 /* Just handle the first 128G fast and the rest via auxiliary
177 primaries. If you change this, Memcheck will assert at startup.
178 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
179 # define N_PRIMARY_BITS 21
181 #endif
184 /* Do not change this. */
185 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
187 /* Do not change this. */
188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
191 /* --------------- Secondary maps --------------- */
193 // Each byte of memory conceptually has an A bit, which indicates its
194 // addressability, and 8 V bits, which indicates its definedness.
196 // But because very few bytes are partially defined, we can use a nice
197 // compression scheme to reduce the size of shadow memory. Each byte of
198 // memory has 2 bits which indicates its state (ie. V+A bits):
200 // 00: noaccess (unaddressable but treated as fully defined)
201 // 01: undefined (addressable and fully undefined)
202 // 10: defined (addressable and fully defined)
203 // 11: partdefined (addressable and partially defined)
205 // In the "partdefined" case, we use a secondary table to store the V bits.
206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
207 // bits.
209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
210 // four bytes (32 bits) of memory are in each chunk. Hence the name
211 // "vabits8". This lets us get the V+A bits for four bytes at a time
212 // easily (without having to do any shifting and/or masking), and that is a
213 // very common operation. (Note that although each vabits8 chunk
214 // is 8 bits in size, it represents 32 bits of memory.)
216 // The representation is "inverse" little-endian... each 4 bytes of
217 // memory is represented by a 1 byte value, where:
219 // - the status of byte (a+0) is held in bits [1..0]
220 // - the status of byte (a+1) is held in bits [3..2]
221 // - the status of byte (a+2) is held in bits [5..4]
222 // - the status of byte (a+3) is held in bits [7..6]
224 // It's "inverse" because endianness normally describes a mapping from
225 // value bits to memory addresses; in this case the mapping is inverted.
226 // Ie. instead of particular value bits being held in certain addresses, in
227 // this case certain addresses are represented by particular value bits.
228 // See insert_vabits2_into_vabits8() for an example.
230 // But note that we don't compress the V bits stored in registers; they
231 // need to be explicit to made the shadow operations possible. Therefore
232 // when moving values between registers and memory we need to convert
233 // between the expanded in-register format and the compressed in-memory
234 // format. This isn't so difficult, it just requires careful attention in a
235 // few places.
237 // These represent eight bits of memory.
238 #define VA_BITS2_NOACCESS 0x0 // 00b
239 #define VA_BITS2_UNDEFINED 0x1 // 01b
240 #define VA_BITS2_DEFINED 0x2 // 10b
241 #define VA_BITS2_PARTDEFINED 0x3 // 11b
243 // These represent 16 bits of memory.
244 #define VA_BITS4_NOACCESS 0x0 // 00_00b
245 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
246 #define VA_BITS4_DEFINED 0xa // 10_10b
248 // These represent 32 bits of memory.
249 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
250 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
251 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
253 // These represent 64 bits of memory.
254 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
255 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
256 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
258 // These represent 128 bits of memory.
259 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
262 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
263 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
264 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
266 // Paranoia: it's critical for performance that the requested inlining
267 // occurs. So try extra hard.
268 #define INLINE inline __attribute__((always_inline))
270 static INLINE Addr start_of_this_sm ( Addr a ) {
271 return (a & (~SM_MASK));
273 static INLINE Bool is_start_of_sm ( Addr a ) {
274 return (start_of_this_sm(a) == a);
277 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
279 typedef
280 union {
281 UChar vabits8[SM_CHUNKS];
282 UShort vabits16[SM_CHUNKS/2];
284 SecMap;
286 // 3 distinguished secondary maps, one for no-access, one for
287 // accessible but undefined, and one for accessible and defined.
288 // Distinguished secondaries may never be modified.
289 #define SM_DIST_NOACCESS 0
290 #define SM_DIST_UNDEFINED 1
291 #define SM_DIST_DEFINED 2
293 static SecMap sm_distinguished[3];
295 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
296 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
299 // Forward declaration
300 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
302 /* dist_sm points to one of our three distinguished secondaries. Make
303 a copy of it so that we can write to it.
305 static SecMap* copy_for_writing ( SecMap* dist_sm )
307 SecMap* new_sm;
308 tl_assert(dist_sm == &sm_distinguished[0]
309 || dist_sm == &sm_distinguished[1]
310 || dist_sm == &sm_distinguished[2]);
312 SysRes sres = VG_(am_shadow_alloc)(sizeof(SecMap));
313 if (sr_isError(sres))
314 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
315 sizeof(SecMap), sr_Err(sres) );
316 new_sm = (void *)(Addr)sr_Res(sres);
317 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
318 update_SM_counts(dist_sm, new_sm);
319 return new_sm;
322 /* --------------- Stats --------------- */
324 static Int n_issued_SMs = 0;
325 static Int n_deissued_SMs = 0;
326 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
327 static Int n_undefined_SMs = 0;
328 static Int n_defined_SMs = 0;
329 static Int n_non_DSM_SMs = 0;
330 static Int max_noaccess_SMs = 0;
331 static Int max_undefined_SMs = 0;
332 static Int max_defined_SMs = 0;
333 static Int max_non_DSM_SMs = 0;
335 /* # searches initiated in auxmap_L1, and # base cmps required */
336 static ULong n_auxmap_L1_searches = 0;
337 static ULong n_auxmap_L1_cmps = 0;
338 /* # of searches that missed in auxmap_L1 and therefore had to
339 be handed to auxmap_L2. And the number of nodes inserted. */
340 static ULong n_auxmap_L2_searches = 0;
341 static ULong n_auxmap_L2_nodes = 0;
343 static Int n_sanity_cheap = 0;
344 static Int n_sanity_expensive = 0;
346 static Int n_secVBit_nodes = 0;
347 static Int max_secVBit_nodes = 0;
349 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
351 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
352 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
353 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
354 else { n_non_DSM_SMs --;
355 n_deissued_SMs ++; }
357 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
358 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
359 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
360 else { n_non_DSM_SMs ++;
361 n_issued_SMs ++; }
363 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
364 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
365 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
366 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
369 /* --------------- Primary maps --------------- */
371 /* The main primary map. This covers some initial part of the address
372 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
373 handled using the auxiliary primary map.
375 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
376 && (defined(VGP_arm_linux) \
377 || defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
378 /* mc_main_asm.c needs visibility on a few things declared in this file.
379 MC_MAIN_STATIC allows to define them static if ok, i.e. on
380 platforms that are not using hand-coded asm statements. */
381 #define MC_MAIN_STATIC
382 #else
383 #define MC_MAIN_STATIC static
384 #endif
385 MC_MAIN_STATIC SecMap* primary_map[N_PRIMARY_MAP];
388 /* An entry in the auxiliary primary map. base must be a 64k-aligned
389 value, and sm points at the relevant secondary map. As with the
390 main primary map, the secondary may be either a real secondary, or
391 one of the three distinguished secondaries. DO NOT CHANGE THIS
392 LAYOUT: the first word has to be the key for OSet fast lookups.
394 typedef
395 struct {
396 Addr base;
397 SecMap* sm;
399 AuxMapEnt;
401 /* Tunable parameter: How big is the L1 queue? */
402 #define N_AUXMAP_L1 24
404 /* Tunable parameter: How far along the L1 queue to insert
405 entries resulting from L2 lookups? */
406 #define AUXMAP_L1_INSERT_IX 12
408 static struct {
409 Addr base;
410 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
412 auxmap_L1[N_AUXMAP_L1];
414 static OSet* auxmap_L2 = NULL;
416 static void init_auxmap_L1_L2 ( void )
418 Int i;
419 for (i = 0; i < N_AUXMAP_L1; i++) {
420 auxmap_L1[i].base = 0;
421 auxmap_L1[i].ent = NULL;
424 tl_assert(0 == offsetof(AuxMapEnt,base));
425 tl_assert(sizeof(Addr) == sizeof(void*));
426 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
427 /*fastCmp*/ NULL,
428 VG_(malloc), "mc.iaLL.1", VG_(free) );
431 /* Check representation invariants; if OK return NULL; else a
432 descriptive bit of text. Also return the number of
433 non-distinguished secondary maps referred to from the auxiliary
434 primary maps. */
436 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
438 Word i, j;
439 /* On a 32-bit platform, the L2 and L1 tables should
440 both remain empty forever.
442 On a 64-bit platform:
443 In the L2 table:
444 all .base & 0xFFFF == 0
445 all .base > MAX_PRIMARY_ADDRESS
446 In the L1 table:
447 all .base & 0xFFFF == 0
448 all (.base > MAX_PRIMARY_ADDRESS
449 .base & 0xFFFF == 0
450 and .ent points to an AuxMapEnt with the same .base)
452 (.base == 0 and .ent == NULL)
454 *n_secmaps_found = 0;
455 if (sizeof(void*) == 4) {
456 /* 32-bit platform */
457 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
458 return "32-bit: auxmap_L2 is non-empty";
459 for (i = 0; i < N_AUXMAP_L1; i++)
460 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
461 return "32-bit: auxmap_L1 is non-empty";
462 } else {
463 /* 64-bit platform */
464 UWord elems_seen = 0;
465 AuxMapEnt *elem, *res;
466 AuxMapEnt key;
467 /* L2 table */
468 VG_(OSetGen_ResetIter)(auxmap_L2);
469 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
470 elems_seen++;
471 if (0 != (elem->base & (Addr)0xFFFF))
472 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
473 if (elem->base <= MAX_PRIMARY_ADDRESS)
474 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
475 if (elem->sm == NULL)
476 return "64-bit: .sm in _L2 is NULL";
477 if (!is_distinguished_sm(elem->sm))
478 (*n_secmaps_found)++;
480 if (elems_seen != n_auxmap_L2_nodes)
481 return "64-bit: disagreement on number of elems in _L2";
482 /* Check L1-L2 correspondence */
483 for (i = 0; i < N_AUXMAP_L1; i++) {
484 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
485 continue;
486 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
487 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
488 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
489 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
490 if (auxmap_L1[i].ent == NULL)
491 return "64-bit: .ent is NULL in auxmap_L1";
492 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
493 return "64-bit: _L1 and _L2 bases are inconsistent";
494 /* Look it up in auxmap_L2. */
495 key.base = auxmap_L1[i].base;
496 key.sm = 0;
497 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
498 if (res == NULL)
499 return "64-bit: _L1 .base not found in _L2";
500 if (res != auxmap_L1[i].ent)
501 return "64-bit: _L1 .ent disagrees with _L2 entry";
503 /* Check L1 contains no duplicates */
504 for (i = 0; i < N_AUXMAP_L1; i++) {
505 if (auxmap_L1[i].base == 0)
506 continue;
507 for (j = i+1; j < N_AUXMAP_L1; j++) {
508 if (auxmap_L1[j].base == 0)
509 continue;
510 if (auxmap_L1[j].base == auxmap_L1[i].base)
511 return "64-bit: duplicate _L1 .base entries";
515 return NULL; /* ok */
518 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
520 Word i;
521 tl_assert(ent);
522 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
523 for (i = N_AUXMAP_L1-1; i > rank; i--)
524 auxmap_L1[i] = auxmap_L1[i-1];
525 auxmap_L1[rank].base = ent->base;
526 auxmap_L1[rank].ent = ent;
529 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
531 AuxMapEnt key;
532 AuxMapEnt* res;
533 Word i;
535 tl_assert(a > MAX_PRIMARY_ADDRESS);
536 a &= ~(Addr)0xFFFF;
538 /* First search the front-cache, which is a self-organising
539 list containing the most popular entries. */
541 if (LIKELY(auxmap_L1[0].base == a))
542 return auxmap_L1[0].ent;
543 if (LIKELY(auxmap_L1[1].base == a)) {
544 Addr t_base = auxmap_L1[0].base;
545 AuxMapEnt* t_ent = auxmap_L1[0].ent;
546 auxmap_L1[0].base = auxmap_L1[1].base;
547 auxmap_L1[0].ent = auxmap_L1[1].ent;
548 auxmap_L1[1].base = t_base;
549 auxmap_L1[1].ent = t_ent;
550 return auxmap_L1[0].ent;
553 n_auxmap_L1_searches++;
555 for (i = 0; i < N_AUXMAP_L1; i++) {
556 if (auxmap_L1[i].base == a) {
557 break;
560 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
562 n_auxmap_L1_cmps += (ULong)(i+1);
564 if (i < N_AUXMAP_L1) {
565 if (i > 0) {
566 Addr t_base = auxmap_L1[i-1].base;
567 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
568 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
569 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
570 auxmap_L1[i-0].base = t_base;
571 auxmap_L1[i-0].ent = t_ent;
572 i--;
574 return auxmap_L1[i].ent;
577 n_auxmap_L2_searches++;
579 /* First see if we already have it. */
580 key.base = a;
581 key.sm = 0;
583 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
584 if (res)
585 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
586 return res;
589 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
591 AuxMapEnt *nyu, *res;
593 /* First see if we already have it. */
594 res = maybe_find_in_auxmap( a );
595 if (LIKELY(res))
596 return res;
598 /* Ok, there's no entry in the secondary map, so we'll have
599 to allocate one. */
600 a &= ~(Addr)0xFFFF;
602 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
603 nyu->base = a;
604 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
605 VG_(OSetGen_Insert)( auxmap_L2, nyu );
606 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
607 n_auxmap_L2_nodes++;
608 return nyu;
611 /* --------------- SecMap fundamentals --------------- */
613 // In all these, 'low' means it's definitely in the main primary map,
614 // 'high' means it's definitely in the auxiliary table.
616 static INLINE UWord get_primary_map_low_offset ( Addr a )
618 UWord pm_off = a >> 16;
619 return pm_off;
622 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
624 UWord pm_off = a >> 16;
625 # if VG_DEBUG_MEMORY >= 1
626 tl_assert(pm_off < N_PRIMARY_MAP);
627 # endif
628 return &primary_map[ pm_off ];
631 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
633 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
634 return &am->sm;
637 static INLINE SecMap** get_secmap_ptr ( Addr a )
639 return ( a <= MAX_PRIMARY_ADDRESS
640 ? get_secmap_low_ptr(a)
641 : get_secmap_high_ptr(a));
644 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
646 return *get_secmap_low_ptr(a);
649 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
651 return *get_secmap_high_ptr(a);
654 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
656 SecMap** p = get_secmap_low_ptr(a);
657 if (UNLIKELY(is_distinguished_sm(*p)))
658 *p = copy_for_writing(*p);
659 return *p;
662 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
664 SecMap** p = get_secmap_high_ptr(a);
665 if (UNLIKELY(is_distinguished_sm(*p)))
666 *p = copy_for_writing(*p);
667 return *p;
670 /* Produce the secmap for 'a', either from the primary map or by
671 ensuring there is an entry for it in the aux primary map. The
672 secmap may be a distinguished one as the caller will only want to
673 be able to read it.
675 static INLINE SecMap* get_secmap_for_reading ( Addr a )
677 return ( a <= MAX_PRIMARY_ADDRESS
678 ? get_secmap_for_reading_low (a)
679 : get_secmap_for_reading_high(a) );
682 /* Produce the secmap for 'a', either from the primary map or by
683 ensuring there is an entry for it in the aux primary map. The
684 secmap may not be a distinguished one, since the caller will want
685 to be able to write it. If it is a distinguished secondary, make a
686 writable copy of it, install it, and return the copy instead. (COW
687 semantics).
689 static INLINE SecMap* get_secmap_for_writing ( Addr a )
691 return ( a <= MAX_PRIMARY_ADDRESS
692 ? get_secmap_for_writing_low (a)
693 : get_secmap_for_writing_high(a) );
696 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
697 allocate one if one doesn't already exist. This is used by the
698 leak checker.
700 static SecMap* maybe_get_secmap_for ( Addr a )
702 if (a <= MAX_PRIMARY_ADDRESS) {
703 return get_secmap_for_reading_low(a);
704 } else {
705 AuxMapEnt* am = maybe_find_in_auxmap(a);
706 return am ? am->sm : NULL;
710 /* --------------- Fundamental functions --------------- */
712 static INLINE
713 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
715 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
716 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
717 *vabits8 |= (vabits2 << shift); // mask in the two new bits
720 static INLINE
721 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
723 UInt shift;
724 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
725 shift = (a & 2) << 1; // shift by 0 or 4
726 *vabits8 &= ~(0xf << shift); // mask out the four old bits
727 *vabits8 |= (vabits4 << shift); // mask in the four new bits
730 static INLINE
731 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
733 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
734 vabits8 >>= shift; // shift the two bits to the bottom
735 return 0x3 & vabits8; // mask out the rest
738 static INLINE
739 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
741 UInt shift;
742 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
743 shift = (a & 2) << 1; // shift by 0 or 4
744 vabits8 >>= shift; // shift the four bits to the bottom
745 return 0xf & vabits8; // mask out the rest
748 // Note that these four are only used in slow cases. The fast cases do
749 // clever things like combine the auxmap check (in
750 // get_secmap_{read,writ}able) with alignment checks.
752 // *** WARNING! ***
753 // Any time this function is called, if it is possible that vabits2
754 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
755 // sec-V-bits table must also be set!
756 static INLINE
757 void set_vabits2 ( Addr a, UChar vabits2 )
759 SecMap* sm = get_secmap_for_writing(a);
760 UWord sm_off = SM_OFF(a);
761 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
764 static INLINE
765 UChar get_vabits2 ( Addr a )
767 SecMap* sm = get_secmap_for_reading(a);
768 UWord sm_off = SM_OFF(a);
769 UChar vabits8 = sm->vabits8[sm_off];
770 return extract_vabits2_from_vabits8(a, vabits8);
773 // *** WARNING! ***
774 // Any time this function is called, if it is possible that any of the
775 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
776 // corresponding entry(s) in the sec-V-bits table must also be set!
777 static INLINE
778 UChar get_vabits8_for_aligned_word32 ( Addr a )
780 SecMap* sm = get_secmap_for_reading(a);
781 UWord sm_off = SM_OFF(a);
782 UChar vabits8 = sm->vabits8[sm_off];
783 return vabits8;
786 static INLINE
787 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
789 SecMap* sm = get_secmap_for_writing(a);
790 UWord sm_off = SM_OFF(a);
791 sm->vabits8[sm_off] = vabits8;
795 // Forward declarations
796 static UWord get_sec_vbits8(Addr a);
797 static void set_sec_vbits8(Addr a, UWord vbits8);
799 // Returns False if there was an addressability error.
800 static INLINE
801 Bool set_vbits8 ( Addr a, UChar vbits8 )
803 Bool ok = True;
804 UChar vabits2 = get_vabits2(a);
805 if ( VA_BITS2_NOACCESS != vabits2 ) {
806 // Addressable. Convert in-register format to in-memory format.
807 // Also remove any existing sec V bit entry for the byte if no
808 // longer necessary.
809 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
810 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
811 else { vabits2 = VA_BITS2_PARTDEFINED;
812 set_sec_vbits8(a, vbits8); }
813 set_vabits2(a, vabits2);
815 } else {
816 // Unaddressable! Do nothing -- when writing to unaddressable
817 // memory it acts as a black hole, and the V bits can never be seen
818 // again. So we don't have to write them at all.
819 ok = False;
821 return ok;
824 // Returns False if there was an addressability error. In that case, we put
825 // all defined bits into vbits8.
826 static INLINE
827 Bool get_vbits8 ( Addr a, UChar* vbits8 )
829 Bool ok = True;
830 UChar vabits2 = get_vabits2(a);
832 // Convert the in-memory format to in-register format.
833 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
834 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
835 else if ( VA_BITS2_NOACCESS == vabits2 ) {
836 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
837 ok = False;
838 } else {
839 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
840 *vbits8 = get_sec_vbits8(a);
842 return ok;
846 /* --------------- Secondary V bit table ------------ */
848 // This table holds the full V bit pattern for partially-defined bytes
849 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
850 // memory.
852 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
853 // then overwrite the same address with a fully defined byte, the sec-V-bit
854 // node will not necessarily be removed. This is because checking for
855 // whether removal is necessary would slow down the fast paths.
857 // To avoid the stale nodes building up too much, we periodically (once the
858 // table reaches a certain size) garbage collect (GC) the table by
859 // traversing it and evicting any nodes not having PDB.
860 // If more than a certain proportion of nodes survived, we increase the
861 // table size so that GCs occur less often.
863 // This policy is designed to avoid bad table bloat in the worst case where
864 // a program creates huge numbers of stale PDBs -- we would get this bloat
865 // if we had no GC -- while handling well the case where a node becomes
866 // stale but shortly afterwards is rewritten with a PDB and so becomes
867 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
868 // remove all stale nodes as soon as possible, we just end up re-adding a
869 // lot of them in later again. The "sufficiently stale" approach avoids
870 // this. (If a program has many live PDBs, performance will just suck,
871 // there's no way around that.)
873 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
874 // holding on to stale entries for 2 GCs before discarding them can lead
875 // to massive space leaks. So we're changing to an arrangement where
876 // lines are evicted as soon as they are observed to be stale during a
877 // GC. This also has a side benefit of allowing the sufficiently_stale
878 // field to be removed from the SecVBitNode struct, reducing its size by
879 // 8 bytes, which is a substantial space saving considering that the
880 // struct was previously 32 or so bytes, on a 64 bit target.
882 // In order to try and mitigate the problem that the "sufficiently stale"
883 // heuristic was designed to avoid, the table size is allowed to drift
884 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
885 // means that nodes will exist in the table longer on average, and hopefully
886 // will be deleted and re-added less frequently.
888 // The previous scaling up mechanism (now called STEPUP) is retained:
889 // if residency exceeds 50%, the table is scaled up, although by a
890 // factor sqrt(2) rather than 2 as before. This effectively doubles the
891 // frequency of GCs when there are many PDBs at reduces the tendency of
892 // stale PDBs to reside for long periods in the table.
894 static OSet* secVBitTable;
896 // Stats
897 static ULong sec_vbits_new_nodes = 0;
898 static ULong sec_vbits_updates = 0;
900 // This must be a power of two; this is checked in mc_pre_clo_init().
901 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
902 // a larger address range) they take more space but we can get multiple
903 // partially-defined bytes in one if they are close to each other, reducing
904 // the number of total nodes. In practice sometimes they are clustered (eg.
905 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
906 // row), but often not. So we choose something intermediate.
907 #define BYTES_PER_SEC_VBIT_NODE 16
909 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
910 // more than this many nodes survive a GC.
911 #define STEPUP_SURVIVOR_PROPORTION 0.5
912 #define STEPUP_GROWTH_FACTOR 1.414213562
914 // If the above heuristic doesn't apply, then we may make the table
915 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
916 // this many nodes survive a GC, _and_ the total table size does
917 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
918 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
919 // effectively although gradually reduces residency and increases time
920 // between GCs for programs with small numbers of PDBs. The 80000 limit
921 // effectively limits the table size to around 2MB for programs with
922 // small numbers of PDBs, whilst giving a reasonably long lifetime to
923 // entries, to try and reduce the costs resulting from deleting and
924 // re-adding of entries.
925 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
926 #define DRIFTUP_GROWTH_FACTOR 1.015
927 #define DRIFTUP_MAX_SIZE 80000
929 // We GC the table when it gets this many nodes in it, ie. it's effectively
930 // the table size. It can change.
931 static Int secVBitLimit = 1000;
933 // The number of GCs done, used to age sec-V-bit nodes for eviction.
934 // Because it's unsigned, wrapping doesn't matter -- the right answer will
935 // come out anyway.
936 static UInt GCs_done = 0;
938 typedef
939 struct {
940 Addr a;
941 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
943 SecVBitNode;
945 static OSet* createSecVBitTable(void)
947 OSet* newSecVBitTable;
948 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
949 ( offsetof(SecVBitNode, a),
950 NULL, // use fast comparisons
951 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
952 VG_(free),
953 1000,
954 sizeof(SecVBitNode));
955 return newSecVBitTable;
958 static void gcSecVBitTable(void)
960 OSet* secVBitTable2;
961 SecVBitNode* n;
962 Int i, n_nodes = 0, n_survivors = 0;
964 GCs_done++;
966 // Create the new table.
967 secVBitTable2 = createSecVBitTable();
969 // Traverse the table, moving fresh nodes into the new table.
970 VG_(OSetGen_ResetIter)(secVBitTable);
971 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
972 // Keep node if any of its bytes are non-stale. Using
973 // get_vabits2() for the lookup is not very efficient, but I don't
974 // think it matters.
975 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
976 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
977 // Found a non-stale byte, so keep =>
978 // Insert a copy of the node into the new table.
979 SecVBitNode* n2 =
980 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
981 *n2 = *n;
982 VG_(OSetGen_Insert)(secVBitTable2, n2);
983 break;
988 // Get the before and after sizes.
989 n_nodes = VG_(OSetGen_Size)(secVBitTable);
990 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
992 // Destroy the old table, and put the new one in its place.
993 VG_(OSetGen_Destroy)(secVBitTable);
994 secVBitTable = secVBitTable2;
996 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
997 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
998 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
1001 // Increase table size if necessary.
1002 if ((Double)n_survivors
1003 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
1004 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
1005 if (VG_(clo_verbosity) > 1)
1006 VG_(message)(Vg_DebugMsg,
1007 "memcheck GC: %d new table size (stepup)\n",
1008 secVBitLimit);
1010 else
1011 if (secVBitLimit < DRIFTUP_MAX_SIZE
1012 && (Double)n_survivors
1013 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
1014 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
1015 if (VG_(clo_verbosity) > 1)
1016 VG_(message)(Vg_DebugMsg,
1017 "memcheck GC: %d new table size (driftup)\n",
1018 secVBitLimit);
1022 static UWord get_sec_vbits8(Addr a)
1024 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1025 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1026 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1027 UChar vbits8;
1028 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1029 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1030 // make it to the secondary V bits table.
1031 vbits8 = n->vbits8[amod];
1032 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1033 return vbits8;
1036 static void set_sec_vbits8(Addr a, UWord vbits8)
1038 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1039 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1040 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1041 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1042 // make it to the secondary V bits table.
1043 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1044 if (n) {
1045 n->vbits8[amod] = vbits8; // update
1046 sec_vbits_updates++;
1047 } else {
1048 // Do a table GC if necessary. Nb: do this before creating and
1049 // inserting the new node, to avoid erroneously GC'ing the new node.
1050 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1051 gcSecVBitTable();
1054 // New node: assign the specific byte, make the rest invalid (they
1055 // should never be read as-is, but be cautious).
1056 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1057 n->a = aAligned;
1058 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1059 n->vbits8[i] = V_BITS8_UNDEFINED;
1061 n->vbits8[amod] = vbits8;
1063 // Insert the new node.
1064 VG_(OSetGen_Insert)(secVBitTable, n);
1065 sec_vbits_new_nodes++;
1067 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1068 if (n_secVBit_nodes > max_secVBit_nodes)
1069 max_secVBit_nodes = n_secVBit_nodes;
1073 /* --------------- Endianness helpers --------------- */
1075 /* Returns the offset in memory of the byteno-th most significant byte
1076 in a wordszB-sized word, given the specified endianness. */
1077 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1078 UWord byteno ) {
1079 return bigendian ? (wordszB-1-byteno) : byteno;
1083 /* --------------- Ignored address ranges --------------- */
1085 /* Denotes the address-error-reportability status for address ranges:
1086 IAR_NotIgnored: the usual case -- report errors in this range
1087 IAR_CommandLine: don't report errors -- from command line setting
1088 IAR_ClientReq: don't report errors -- from client request
1090 typedef
1091 enum { IAR_INVALID=99,
1092 IAR_NotIgnored,
1093 IAR_CommandLine,
1094 IAR_ClientReq }
1095 IARKind;
1097 static const HChar* showIARKind ( IARKind iark )
1099 switch (iark) {
1100 case IAR_INVALID: return "INVALID";
1101 case IAR_NotIgnored: return "NotIgnored";
1102 case IAR_CommandLine: return "CommandLine";
1103 case IAR_ClientReq: return "ClientReq";
1104 default: return "???";
1108 // RangeMap<IARKind>
1109 static RangeMap* gIgnoredAddressRanges = NULL;
1111 static void init_gIgnoredAddressRanges ( void )
1113 if (LIKELY(gIgnoredAddressRanges != NULL))
1114 return;
1115 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1116 VG_(free), IAR_NotIgnored );
1119 Bool MC_(in_ignored_range) ( Addr a )
1121 if (LIKELY(gIgnoredAddressRanges == NULL))
1122 return False;
1123 UWord how = IAR_INVALID;
1124 UWord key_min = ~(UWord)0;
1125 UWord key_max = (UWord)0;
1126 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1127 tl_assert(key_min <= a && a <= key_max);
1128 switch (how) {
1129 case IAR_NotIgnored: return False;
1130 case IAR_CommandLine: return True;
1131 case IAR_ClientReq: return True;
1132 default: break; /* invalid */
1134 VG_(tool_panic)("MC_(in_ignore_range)");
1135 /*NOTREACHED*/
1138 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
1140 if (LIKELY(!MC_(clo_ignore_range_below_sp)))
1141 return False;
1142 tl_assert(szB >= 1 && szB <= 32);
1143 tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
1144 > MC_(clo_ignore_range_below_sp__last_offset));
1145 Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
1146 Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
1147 if (range_lo >= range_hi) {
1148 /* Bizarre. We have a wraparound situation. What should we do? */
1149 return False; // Play safe
1150 } else {
1151 /* This is the expected case. */
1152 if (range_lo <= a && a + szB - 1 <= range_hi)
1153 return True;
1154 else
1155 return False;
1157 /*NOTREACHED*/
1158 tl_assert(0);
1161 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1163 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
1165 Bool ok = VG_(parse_Addr) (ppc, result1);
1166 if (!ok)
1167 return False;
1168 if (**ppc != '-')
1169 return False;
1170 (*ppc)++;
1171 ok = VG_(parse_Addr) (ppc, result2);
1172 if (!ok)
1173 return False;
1174 return True;
1177 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1178 or fail. */
1180 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
1182 Bool ok = VG_(parse_UInt) (ppc, result1);
1183 if (!ok)
1184 return False;
1185 if (**ppc != '-')
1186 return False;
1187 (*ppc)++;
1188 ok = VG_(parse_UInt) (ppc, result2);
1189 if (!ok)
1190 return False;
1191 return True;
1194 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1195 fail. If they are valid, add them to the global set of ignored
1196 ranges. */
1197 static Bool parse_ignore_ranges ( const HChar* str0 )
1199 init_gIgnoredAddressRanges();
1200 const HChar* str = str0;
1201 const HChar** ppc = &str;
1202 while (1) {
1203 Addr start = ~(Addr)0;
1204 Addr end = (Addr)0;
1205 Bool ok = parse_Addr_pair(ppc, &start, &end);
1206 if (!ok)
1207 return False;
1208 if (start > end)
1209 return False;
1210 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1211 if (**ppc == 0)
1212 return True;
1213 if (**ppc != ',')
1214 return False;
1215 (*ppc)++;
1217 /*NOTREACHED*/
1218 return False;
1221 /* Add or remove [start, +len) from the set of ignored ranges. */
1222 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1224 init_gIgnoredAddressRanges();
1225 const Bool verbose = (VG_(clo_verbosity) > 1);
1226 if (len == 0) {
1227 return False;
1229 if (addRange) {
1230 VG_(bindRangeMap)(gIgnoredAddressRanges,
1231 start, start+len-1, IAR_ClientReq);
1232 if (verbose)
1233 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1234 (void*)start, (void*)(start+len-1));
1235 } else {
1236 VG_(bindRangeMap)(gIgnoredAddressRanges,
1237 start, start+len-1, IAR_NotIgnored);
1238 if (verbose)
1239 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1240 (void*)start, (void*)(start+len-1));
1242 if (verbose) {
1243 VG_(dmsg)("memcheck: now have %u ranges:\n",
1244 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1245 UInt i;
1246 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1247 UWord val = IAR_INVALID;
1248 UWord key_min = ~(UWord)0;
1249 UWord key_max = (UWord)0;
1250 VG_(indexRangeMap)( &key_min, &key_max, &val,
1251 gIgnoredAddressRanges, i );
1252 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n",
1253 i, key_min, key_max, showIARKind(val));
1256 return True;
1260 /* --------------- Load/store slow cases. --------------- */
1262 static
1263 __attribute__((noinline))
1264 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1265 Addr a, SizeT nBits, Bool bigendian )
1267 ULong pessim[4]; /* only used when p-l-ok=yes */
1268 SSizeT szB = nBits / 8;
1269 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1270 SSizeT i, j; /* Must be signed. */
1271 SizeT n_addrs_bad = 0;
1272 Addr ai;
1273 UChar vbits8;
1274 Bool ok;
1276 /* Code below assumes load size is a power of two and at least 64
1277 bits. */
1278 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1280 /* If this triggers, you probably just need to increase the size of
1281 the pessim array. */
1282 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1284 for (j = 0; j < szL; j++) {
1285 pessim[j] = V_BITS64_DEFINED;
1286 res[j] = V_BITS64_UNDEFINED;
1289 /* Make up a result V word, which contains the loaded data for
1290 valid addresses and Defined for invalid addresses. Iterate over
1291 the bytes in the word, from the most significant down to the
1292 least. The vbits to return are calculated into vbits128. Also
1293 compute the pessimising value to be used when
1294 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1295 info can be gleaned from the pessim array) but is used as a
1296 cross-check. */
1297 for (j = szL-1; j >= 0; j--) {
1298 ULong vbits64 = V_BITS64_UNDEFINED;
1299 ULong pessim64 = V_BITS64_DEFINED;
1300 UWord long_index = byte_offset_w(szL, bigendian, j);
1301 for (i = 8-1; i >= 0; i--) {
1302 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1303 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1304 ok = get_vbits8(ai, &vbits8);
1305 vbits64 <<= 8;
1306 vbits64 |= vbits8;
1307 if (!ok) n_addrs_bad++;
1308 pessim64 <<= 8;
1309 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1311 res[long_index] = vbits64;
1312 pessim[long_index] = pessim64;
1315 /* In the common case, all the addresses involved are valid, so we
1316 just return the computed V bits and have done. */
1317 if (LIKELY(n_addrs_bad == 0))
1318 return;
1320 /* If there's no possibility of getting a partial-loads-ok
1321 exemption, report the error and quit. */
1322 if (!MC_(clo_partial_loads_ok)) {
1323 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1324 return;
1327 /* The partial-loads-ok excemption might apply. Find out if it
1328 does. If so, don't report an addressing error, but do return
1329 Undefined for the bytes that are out of range, so as to avoid
1330 false negatives. If it doesn't apply, just report an addressing
1331 error in the usual way. */
1333 /* Some code steps along byte strings in aligned chunks
1334 even when there is only a partially defined word at the end (eg,
1335 optimised strlen). This is allowed by the memory model of
1336 modern machines, since an aligned load cannot span two pages and
1337 thus cannot "partially fault".
1339 Therefore, a load from a partially-addressible place is allowed
1340 if all of the following hold:
1341 - the command-line flag is set [by default, it isn't]
1342 - it's an aligned load
1343 - at least one of the addresses in the word *is* valid
1345 Since this suppresses the addressing error, we avoid false
1346 negatives by marking bytes undefined when they come from an
1347 invalid address.
1350 /* "at least one of the addresses is invalid" */
1351 ok = False;
1352 for (j = 0; j < szL; j++)
1353 ok |= pessim[j] != V_BITS64_DEFINED;
1354 tl_assert(ok);
1356 # if defined(VGP_s390x_linux)
1357 tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD
1358 /* OK if all loaded bytes are from the same page. */
1359 Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB);
1360 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1361 /* lxvd2x might generate an unaligned 128 bit vector load. */
1362 Bool alignedOK = (szB == 16);
1363 # else
1364 /* OK if the address is aligned by the load size. */
1365 Bool alignedOK = (0 == (a & (szB - 1)));
1366 # endif
1368 if (alignedOK && n_addrs_bad < szB) {
1369 /* Exemption applies. Use the previously computed pessimising
1370 value and return the combined result, but don't flag an
1371 addressing error. The pessimising value is Defined for valid
1372 addresses and Undefined for invalid addresses. */
1373 /* for assumption that doing bitwise or implements UifU */
1374 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1375 /* (really need "UifU" here...)
1376 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1377 for (j = szL-1; j >= 0; j--)
1378 res[j] |= pessim[j];
1379 return;
1382 /* Exemption doesn't apply. Flag an addressing error in the normal
1383 way. */
1384 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1387 MC_MAIN_STATIC
1388 __attribute__((noinline))
1389 __attribute__((used))
1390 VG_REGPARM(3)
1391 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian );
1393 MC_MAIN_STATIC
1394 __attribute__((noinline))
1395 __attribute__((used))
1396 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1397 this function may get called from hand written assembly. */
1398 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1400 PROF_EVENT(MCPE_LOADVN_SLOW);
1402 /* ------------ BEGIN semi-fast cases ------------ */
1403 /* These deal quickly-ish with the common auxiliary primary map
1404 cases on 64-bit platforms. Are merely a speedup hack; can be
1405 omitted without loss of correctness/functionality. Note that in
1406 both cases the "sizeof(void*) == 8" causes these cases to be
1407 folded out by compilers on 32-bit platforms. These are derived
1408 from LOADV64 and LOADV32.
1411 # if defined(VGA_mips64) && defined(VGABI_N32)
1412 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1413 # else
1414 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1415 # endif
1417 SecMap* sm = get_secmap_for_reading(a);
1418 UWord sm_off16 = SM_OFF_16(a);
1419 UWord vabits16 = sm->vabits16[sm_off16];
1420 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1421 return V_BITS64_DEFINED;
1422 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1423 return V_BITS64_UNDEFINED;
1424 /* else fall into the slow case */
1427 # if defined(VGA_mips64) && defined(VGABI_N32)
1428 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1429 # else
1430 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1431 # endif
1433 SecMap* sm = get_secmap_for_reading(a);
1434 UWord sm_off = SM_OFF(a);
1435 UWord vabits8 = sm->vabits8[sm_off];
1436 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1437 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1438 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1439 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1440 /* else fall into slow case */
1443 /* ------------ END semi-fast cases ------------ */
1445 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1446 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1447 SSizeT szB = nBits / 8;
1448 SSizeT i; /* Must be signed. */
1449 SizeT n_addrs_bad = 0;
1450 Addr ai;
1451 UChar vbits8;
1452 Bool ok;
1454 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1456 /* Make up a 64-bit result V word, which contains the loaded data
1457 for valid addresses and Defined for invalid addresses. Iterate
1458 over the bytes in the word, from the most significant down to
1459 the least. The vbits to return are calculated into vbits64.
1460 Also compute the pessimising value to be used when
1461 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1462 info can be gleaned from pessim64) but is used as a
1463 cross-check. */
1464 for (i = szB-1; i >= 0; i--) {
1465 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1466 ai = a + byte_offset_w(szB, bigendian, i);
1467 ok = get_vbits8(ai, &vbits8);
1468 vbits64 <<= 8;
1469 vbits64 |= vbits8;
1470 if (!ok) n_addrs_bad++;
1471 pessim64 <<= 8;
1472 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1475 /* In the common case, all the addresses involved are valid, so we
1476 just return the computed V bits and have done. */
1477 if (LIKELY(n_addrs_bad == 0))
1478 return vbits64;
1480 /* If there's no possibility of getting a partial-loads-ok
1481 exemption, report the error and quit. */
1482 if (!MC_(clo_partial_loads_ok)) {
1483 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1484 return vbits64;
1487 /* The partial-loads-ok excemption might apply. Find out if it
1488 does. If so, don't report an addressing error, but do return
1489 Undefined for the bytes that are out of range, so as to avoid
1490 false negatives. If it doesn't apply, just report an addressing
1491 error in the usual way. */
1493 /* Some code steps along byte strings in aligned word-sized chunks
1494 even when there is only a partially defined word at the end (eg,
1495 optimised strlen). This is allowed by the memory model of
1496 modern machines, since an aligned load cannot span two pages and
1497 thus cannot "partially fault". Despite such behaviour being
1498 declared undefined by ANSI C/C++.
1500 Therefore, a load from a partially-addressible place is allowed
1501 if all of the following hold:
1502 - the command-line flag is set [by default, it isn't]
1503 - it's a word-sized, word-aligned load
1504 - at least one of the addresses in the word *is* valid
1506 Since this suppresses the addressing error, we avoid false
1507 negatives by marking bytes undefined when they come from an
1508 invalid address.
1511 /* "at least one of the addresses is invalid" */
1512 tl_assert(pessim64 != V_BITS64_DEFINED);
1514 # if defined(VGA_mips64) && defined(VGABI_N32)
1515 if (szB == VG_WORDSIZE * 2 && VG_IS_WORD_ALIGNED(a)
1516 && n_addrs_bad < VG_WORDSIZE * 2)
1517 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1518 /* On power unaligned loads of words are OK. */
1519 if (szB == VG_WORDSIZE && n_addrs_bad < VG_WORDSIZE)
1520 # else
1521 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1522 && n_addrs_bad < VG_WORDSIZE)
1523 # endif
1525 /* Exemption applies. Use the previously computed pessimising
1526 value for vbits64 and return the combined result, but don't
1527 flag an addressing error. The pessimising value is Defined
1528 for valid addresses and Undefined for invalid addresses. */
1529 /* for assumption that doing bitwise or implements UifU */
1530 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1531 /* (really need "UifU" here...)
1532 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1533 vbits64 |= pessim64;
1534 return vbits64;
1537 /* Also, in appears that gcc generates string-stepping code in
1538 32-bit chunks on 64 bit platforms. So, also grant an exception
1539 for this case. Note that the first clause of the conditional
1540 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1541 will get folded out in 32 bit builds. */
1542 # if defined(VGA_mips64) && defined(VGABI_N32)
1543 if (VG_WORDSIZE == 4
1544 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1545 # else
1546 if (VG_WORDSIZE == 8
1547 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1548 # endif
1550 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1551 /* (really need "UifU" here...)
1552 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1553 vbits64 |= pessim64;
1554 /* Mark the upper 32 bits as undefined, just to be on the safe
1555 side. */
1556 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1557 return vbits64;
1560 /* Exemption doesn't apply. Flag an addressing error in the normal
1561 way. */
1562 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1564 return vbits64;
1568 static
1569 __attribute__((noinline))
1570 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1572 SizeT szB = nBits / 8;
1573 SizeT i, n_addrs_bad = 0;
1574 UChar vbits8;
1575 Addr ai;
1576 Bool ok;
1578 PROF_EVENT(MCPE_STOREVN_SLOW);
1580 /* ------------ BEGIN semi-fast cases ------------ */
1581 /* These deal quickly-ish with the common auxiliary primary map
1582 cases on 64-bit platforms. Are merely a speedup hack; can be
1583 omitted without loss of correctness/functionality. Note that in
1584 both cases the "sizeof(void*) == 8" causes these cases to be
1585 folded out by compilers on 32-bit platforms. The logic below
1586 is somewhat similar to some cases extensively commented in
1587 MC_(helperc_STOREV8).
1589 # if defined(VGA_mips64) && defined(VGABI_N32)
1590 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1591 # else
1592 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1593 # endif
1595 SecMap* sm = get_secmap_for_reading(a);
1596 UWord sm_off16 = SM_OFF_16(a);
1597 UWord vabits16 = sm->vabits16[sm_off16];
1598 if (LIKELY( !is_distinguished_sm(sm) &&
1599 (VA_BITS16_DEFINED == vabits16 ||
1600 VA_BITS16_UNDEFINED == vabits16) )) {
1601 /* Handle common case quickly: a is suitably aligned, */
1602 /* is mapped, and is addressible. */
1603 // Convert full V-bits in register to compact 2-bit form.
1604 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1605 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
1606 return;
1607 } else if (V_BITS64_UNDEFINED == vbytes) {
1608 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
1609 return;
1611 /* else fall into the slow case */
1613 /* else fall into the slow case */
1616 # if defined(VGA_mips64) && defined(VGABI_N32)
1617 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1618 # else
1619 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1620 # endif
1622 SecMap* sm = get_secmap_for_reading(a);
1623 UWord sm_off = SM_OFF(a);
1624 UWord vabits8 = sm->vabits8[sm_off];
1625 if (LIKELY( !is_distinguished_sm(sm) &&
1626 (VA_BITS8_DEFINED == vabits8 ||
1627 VA_BITS8_UNDEFINED == vabits8) )) {
1628 /* Handle common case quickly: a is suitably aligned, */
1629 /* is mapped, and is addressible. */
1630 // Convert full V-bits in register to compact 2-bit form.
1631 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1632 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1633 return;
1634 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1635 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1636 return;
1638 /* else fall into the slow case */
1640 /* else fall into the slow case */
1642 /* ------------ END semi-fast cases ------------ */
1644 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1646 /* Dump vbytes in memory, iterating from least to most significant
1647 byte. At the same time establish addressibility of the location. */
1648 for (i = 0; i < szB; i++) {
1649 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1650 ai = a + byte_offset_w(szB, bigendian, i);
1651 vbits8 = vbytes & 0xff;
1652 ok = set_vbits8(ai, vbits8);
1653 if (!ok) n_addrs_bad++;
1654 vbytes >>= 8;
1657 /* If an address error has happened, report it. */
1658 if (n_addrs_bad > 0)
1659 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1663 /*------------------------------------------------------------*/
1664 /*--- Setting permissions over address ranges. ---*/
1665 /*------------------------------------------------------------*/
1667 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1668 UWord dsm_num )
1670 UWord sm_off, sm_off16;
1671 UWord vabits2 = vabits16 & 0x3;
1672 SizeT lenA, lenB, len_to_next_secmap;
1673 Addr aNext;
1674 SecMap* sm;
1675 SecMap** sm_ptr;
1676 SecMap* example_dsm;
1678 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1680 /* Check the V+A bits make sense. */
1681 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1682 VA_BITS16_UNDEFINED == vabits16 ||
1683 VA_BITS16_DEFINED == vabits16);
1685 // This code should never write PDBs; ensure this. (See comment above
1686 // set_vabits2().)
1687 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1689 if (lenT == 0)
1690 return;
1692 if (lenT > 256 * 1024 * 1024) {
1693 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1694 const HChar* s = "unknown???";
1695 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1696 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1697 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1698 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1699 "large range [0x%lx, 0x%lx) (%s)\n",
1700 a, a + lenT, s);
1704 #ifndef PERF_FAST_SARP
1705 /*------------------ debug-only case ------------------ */
1707 // Endianness doesn't matter here because all bytes are being set to
1708 // the same value.
1709 // Nb: We don't have to worry about updating the sec-V-bits table
1710 // after these set_vabits2() calls because this code never writes
1711 // VA_BITS2_PARTDEFINED values.
1712 SizeT i;
1713 for (i = 0; i < lenT; i++) {
1714 set_vabits2(a + i, vabits2);
1716 return;
1718 #endif
1720 /*------------------ standard handling ------------------ */
1722 /* Get the distinguished secondary that we might want
1723 to use (part of the space-compression scheme). */
1724 example_dsm = &sm_distinguished[dsm_num];
1726 // We have to handle ranges covering various combinations of partial and
1727 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1728 // Cases marked with a '*' are common.
1730 // TYPE PARTS USED
1731 // ---- ----------
1732 // * one partial sec-map (p) 1
1733 // - one whole sec-map (P) 2
1735 // * two partial sec-maps (pp) 1,3
1736 // - one partial, one whole sec-map (pP) 1,2
1737 // - one whole, one partial sec-map (Pp) 2,3
1738 // - two whole sec-maps (PP) 2,2
1740 // * one partial, one whole, one partial (pPp) 1,2,3
1741 // - one partial, two whole (pPP) 1,2,2
1742 // - two whole, one partial (PPp) 2,2,3
1743 // - three whole (PPP) 2,2,2
1745 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1746 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1747 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1748 // - N whole (PP...PP) 2,2...2,3
1750 // Break up total length (lenT) into two parts: length in the first
1751 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1752 aNext = start_of_this_sm(a) + SM_SIZE;
1753 len_to_next_secmap = aNext - a;
1754 if ( lenT <= len_to_next_secmap ) {
1755 // Range entirely within one sec-map. Covers almost all cases.
1756 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1757 lenA = lenT;
1758 lenB = 0;
1759 } else if (is_start_of_sm(a)) {
1760 // Range spans at least one whole sec-map, and starts at the beginning
1761 // of a sec-map; skip to Part 2.
1762 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1763 lenA = 0;
1764 lenB = lenT;
1765 goto part2;
1766 } else {
1767 // Range spans two or more sec-maps, first one is partial.
1768 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1769 lenA = len_to_next_secmap;
1770 lenB = lenT - lenA;
1773 //------------------------------------------------------------------------
1774 // Part 1: Deal with the first sec_map. Most of the time the range will be
1775 // entirely within a sec_map and this part alone will suffice. Also,
1776 // doing it this way lets us avoid repeatedly testing for the crossing of
1777 // a sec-map boundary within these loops.
1778 //------------------------------------------------------------------------
1780 // If it's distinguished, make it undistinguished if necessary.
1781 sm_ptr = get_secmap_ptr(a);
1782 if (is_distinguished_sm(*sm_ptr)) {
1783 if (*sm_ptr == example_dsm) {
1784 // Sec-map already has the V+A bits that we want, so skip.
1785 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1786 a = aNext;
1787 lenA = 0;
1788 } else {
1789 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1790 *sm_ptr = copy_for_writing(*sm_ptr);
1793 sm = *sm_ptr;
1795 // 1 byte steps
1796 while (True) {
1797 if (VG_IS_8_ALIGNED(a)) break;
1798 if (lenA < 1) break;
1799 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1800 sm_off = SM_OFF(a);
1801 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1802 a += 1;
1803 lenA -= 1;
1805 // 8-aligned, 8 byte steps
1806 while (True) {
1807 if (lenA < 8) break;
1808 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1809 sm_off16 = SM_OFF_16(a);
1810 sm->vabits16[sm_off16] = vabits16;
1811 a += 8;
1812 lenA -= 8;
1814 // 1 byte steps
1815 while (True) {
1816 if (lenA < 1) break;
1817 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1818 sm_off = SM_OFF(a);
1819 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1820 a += 1;
1821 lenA -= 1;
1824 // We've finished the first sec-map. Is that it?
1825 if (lenB == 0)
1826 return;
1828 //------------------------------------------------------------------------
1829 // Part 2: Fast-set entire sec-maps at a time.
1830 //------------------------------------------------------------------------
1831 part2:
1832 // 64KB-aligned, 64KB steps.
1833 // Nb: we can reach here with lenB < SM_SIZE
1834 tl_assert(0 == lenA);
1835 while (True) {
1836 if (lenB < SM_SIZE) break;
1837 tl_assert(is_start_of_sm(a));
1838 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1839 sm_ptr = get_secmap_ptr(a);
1840 if (!is_distinguished_sm(*sm_ptr)) {
1841 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1842 // Free the non-distinguished sec-map that we're replacing. This
1843 // case happens moderately often, enough to be worthwhile.
1844 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1845 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1847 update_SM_counts(*sm_ptr, example_dsm);
1848 // Make the sec-map entry point to the example DSM
1849 *sm_ptr = example_dsm;
1850 lenB -= SM_SIZE;
1851 a += SM_SIZE;
1854 // We've finished the whole sec-maps. Is that it?
1855 if (lenB == 0)
1856 return;
1858 //------------------------------------------------------------------------
1859 // Part 3: Finish off the final partial sec-map, if necessary.
1860 //------------------------------------------------------------------------
1862 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1864 // If it's distinguished, make it undistinguished if necessary.
1865 sm_ptr = get_secmap_ptr(a);
1866 if (is_distinguished_sm(*sm_ptr)) {
1867 if (*sm_ptr == example_dsm) {
1868 // Sec-map already has the V+A bits that we want, so stop.
1869 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1870 return;
1871 } else {
1872 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1873 *sm_ptr = copy_for_writing(*sm_ptr);
1876 sm = *sm_ptr;
1878 // 8-aligned, 8 byte steps
1879 while (True) {
1880 if (lenB < 8) break;
1881 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1882 sm_off16 = SM_OFF_16(a);
1883 sm->vabits16[sm_off16] = vabits16;
1884 a += 8;
1885 lenB -= 8;
1887 // 1 byte steps
1888 while (True) {
1889 if (lenB < 1) return;
1890 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1891 sm_off = SM_OFF(a);
1892 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1893 a += 1;
1894 lenB -= 1;
1899 /* --- Set permissions for arbitrary address ranges --- */
1901 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1903 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1904 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1905 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1906 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1907 ocache_sarp_Clear_Origins ( a, len );
1910 static void make_mem_undefined ( Addr a, SizeT len )
1912 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1913 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1914 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1917 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1919 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1920 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1921 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1922 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1923 ocache_sarp_Set_Origins ( a, len, otag );
1926 static
1927 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1928 ThreadId tid, UInt okind )
1930 UInt ecu;
1931 ExeContext* here;
1932 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1933 if it is invalid. So no need to do it here. */
1934 tl_assert(okind <= 3);
1935 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1936 tl_assert(here);
1937 ecu = VG_(get_ECU_from_ExeContext)(here);
1938 tl_assert(VG_(is_plausible_ECU)(ecu));
1939 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1942 static
1943 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1945 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1948 static
1949 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1951 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1954 void MC_(make_mem_defined) ( Addr a, SizeT len )
1956 PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1957 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1958 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1959 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1960 ocache_sarp_Clear_Origins ( a, len );
1963 __attribute__((unused))
1964 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1966 MC_(make_mem_defined)(a, len);
1969 /* For each byte in [a,a+len), if the byte is addressable, make it be
1970 defined, but if it isn't addressible, leave it alone. In other
1971 words a version of MC_(make_mem_defined) that doesn't mess with
1972 addressibility. Low-performance implementation. */
1973 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1975 SizeT i;
1976 UChar vabits2;
1977 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1978 for (i = 0; i < len; i++) {
1979 vabits2 = get_vabits2( a+i );
1980 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1981 set_vabits2(a+i, VA_BITS2_DEFINED);
1982 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1983 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1989 /* Similarly (needed for mprotect handling ..) */
1990 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1992 SizeT i;
1993 UChar vabits2;
1994 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1995 for (i = 0; i < len; i++) {
1996 vabits2 = get_vabits2( a+i );
1997 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1998 set_vabits2(a+i, VA_BITS2_DEFINED);
1999 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
2000 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
2006 /* --- Block-copy permissions (needed for implementing realloc() and
2007 sys_mremap). --- */
2009 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
2011 SizeT i, j;
2012 UChar vabits2, vabits8;
2013 Bool aligned, nooverlap;
2015 DEBUG("MC_(copy_address_range_state)\n");
2016 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
2018 if (len == 0 || src == dst)
2019 return;
2021 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
2022 nooverlap = src+len <= dst || dst+len <= src;
2024 if (nooverlap && aligned) {
2026 /* Vectorised fast case, when no overlap and suitably aligned */
2027 /* vector loop */
2028 i = 0;
2029 while (len >= 4) {
2030 vabits8 = get_vabits8_for_aligned_word32( src+i );
2031 set_vabits8_for_aligned_word32( dst+i, vabits8 );
2032 if (LIKELY(VA_BITS8_DEFINED == vabits8
2033 || VA_BITS8_UNDEFINED == vabits8
2034 || VA_BITS8_NOACCESS == vabits8)) {
2035 /* do nothing */
2036 } else {
2037 /* have to copy secondary map info */
2038 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
2039 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
2040 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
2041 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
2042 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
2043 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
2044 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
2045 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
2047 i += 4;
2048 len -= 4;
2050 /* fixup loop */
2051 while (len >= 1) {
2052 vabits2 = get_vabits2( src+i );
2053 set_vabits2( dst+i, vabits2 );
2054 if (VA_BITS2_PARTDEFINED == vabits2) {
2055 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2057 i++;
2058 len--;
2061 } else {
2063 /* We have to do things the slow way */
2064 if (src < dst) {
2065 for (i = 0, j = len-1; i < len; i++, j--) {
2066 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
2067 vabits2 = get_vabits2( src+j );
2068 set_vabits2( dst+j, vabits2 );
2069 if (VA_BITS2_PARTDEFINED == vabits2) {
2070 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
2075 if (src > dst) {
2076 for (i = 0; i < len; i++) {
2077 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
2078 vabits2 = get_vabits2( src+i );
2079 set_vabits2( dst+i, vabits2 );
2080 if (VA_BITS2_PARTDEFINED == vabits2) {
2081 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2090 /*------------------------------------------------------------*/
2091 /*--- Origin tracking stuff - cache basics ---*/
2092 /*------------------------------------------------------------*/
2094 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2095 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2097 Note that this implementation draws inspiration from the "origin
2098 tracking by value piggybacking" scheme described in "Tracking Bad
2099 Apples: Reporting the Origin of Null and Undefined Value Errors"
2100 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2101 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2102 implemented completely differently.
2104 Origin tags and ECUs -- about the shadow values
2105 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107 This implementation tracks the defining point of all uninitialised
2108 values using so called "origin tags", which are 32-bit integers,
2109 rather than using the values themselves to encode the origins. The
2110 latter, so-called value piggybacking", is what the OOPSLA07 paper
2111 describes.
2113 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2114 ints (UInts), regardless of the machine's word size. Each tag
2115 comprises an upper 30-bit ECU field and a lower 2-bit
2116 'kind' field. The ECU field is a number given out by m_execontext
2117 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2118 directly as an origin tag (otag), but in fact we want to put
2119 additional information 'kind' field to indicate roughly where the
2120 tag came from. This helps print more understandable error messages
2121 for the user -- it has no other purpose. In summary:
2123 * Both ECUs and origin tags are represented as 32-bit words
2125 * m_execontext and the core-tool interface deal purely in ECUs.
2126 They have no knowledge of origin tags - that is a purely
2127 Memcheck-internal matter.
2129 * all valid ECUs have the lowest 2 bits zero and at least
2130 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2132 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2133 constants defined in mc_include.h.
2135 * to convert an otag back to an ECU, AND it with ~3
2137 One important fact is that no valid otag is zero. A zero otag is
2138 used by the implementation to indicate "no origin", which could
2139 mean that either the value is defined, or it is undefined but the
2140 implementation somehow managed to lose the origin.
2142 The ECU used for memory created by malloc etc is derived from the
2143 stack trace at the time the malloc etc happens. This means the
2144 mechanism can show the exact allocation point for heap-created
2145 uninitialised values.
2147 In contrast, it is simply too expensive to create a complete
2148 backtrace for each stack allocation. Therefore we merely use a
2149 depth-1 backtrace for stack allocations, which can be done once at
2150 translation time, rather than N times at run time. The result of
2151 this is that, for stack created uninitialised values, Memcheck can
2152 only show the allocating function, and not what called it.
2153 Furthermore, compilers tend to move the stack pointer just once at
2154 the start of the function, to allocate all locals, and so in fact
2155 the stack origin almost always simply points to the opening brace
2156 of the function. Net result is, for stack origins, the mechanism
2157 can tell you in which function the undefined value was created, but
2158 that's all. Users will need to carefully check all locals in the
2159 specified function.
2161 Shadowing registers and memory
2162 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2164 Memory is shadowed using a two level cache structure (ocacheL1 and
2165 ocacheL2). Memory references are first directed to ocacheL1. This
2166 is a traditional 2-way set associative cache with 32-byte lines and
2167 approximate LRU replacement within each set.
2169 A naive implementation would require storing one 32 bit otag for
2170 each byte of memory covered, a 4:1 space overhead. Instead, there
2171 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2172 that shows which of the 4 bytes have that shadow value and which
2173 have a shadow value of zero (indicating no origin). Hence a lot of
2174 space is saved, but the cost is that only one different origin per
2175 4 bytes of address space can be represented. This is a source of
2176 imprecision, but how much of a problem it really is remains to be
2177 seen.
2179 A cache line that contains all zeroes ("no origins") contains no
2180 useful information, and can be ejected from the L1 cache "for
2181 free", in the sense that a read miss on the L1 causes a line of
2182 zeroes to be installed. However, ejecting a line containing
2183 nonzeroes risks losing origin information permanently. In order to
2184 prevent such lossage, ejected nonzero lines are placed in a
2185 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2186 lines. This can grow arbitrarily large, and so should ensure that
2187 Memcheck runs out of memory in preference to losing useful origin
2188 info due to cache size limitations.
2190 Shadowing registers is a bit tricky, because the shadow values are
2191 32 bits, regardless of the size of the register. That gives a
2192 problem for registers smaller than 32 bits. The solution is to
2193 find spaces in the guest state that are unused, and use those to
2194 shadow guest state fragments smaller than 32 bits. For example, on
2195 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2196 shadow are allocated for the register's otag, then there are still
2197 12 bytes left over which could be used to shadow 3 other values.
2199 This implies there is some non-obvious mapping from guest state
2200 (start,length) pairs to the relevant shadow offset (for the origin
2201 tags). And it is unfortunately guest-architecture specific. The
2202 mapping is contained in mc_machine.c, which is quite lengthy but
2203 straightforward.
2205 Instrumenting the IR
2206 ~~~~~~~~~~~~~~~~~~~~
2208 Instrumentation is largely straightforward, and done by the
2209 functions schemeE and schemeS in mc_translate.c. These generate
2210 code for handling the origin tags of expressions (E) and statements
2211 (S) respectively. The rather strange names are a reference to the
2212 "compilation schemes" shown in Simon Peyton Jones' book "The
2213 Implementation of Functional Programming Languages" (Prentice Hall,
2214 1987, see
2215 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2217 schemeS merely arranges to move shadow values around the guest
2218 state to track the incoming IR. schemeE is largely trivial too.
2219 The only significant point is how to compute the otag corresponding
2220 to binary (or ternary, quaternary, etc) operator applications. The
2221 rule is simple: just take whichever value is larger (32-bit
2222 unsigned max). Constants get the special value zero. Hence this
2223 rule always propagates a nonzero (known) otag in preference to a
2224 zero (unknown, or more likely, value-is-defined) tag, as we want.
2225 If two different undefined values are inputs to a binary operator
2226 application, then which is propagated is arbitrary, but that
2227 doesn't matter, since the program is erroneous in using either of
2228 the values, and so there's no point in attempting to propagate
2229 both.
2231 Since constants are abstracted to (otag) zero, much of the
2232 instrumentation code can be folded out without difficulty by the
2233 generic post-instrumentation IR cleanup pass, using these rules:
2234 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2235 constants is evaluated at JIT time. And the resulting dead code
2236 removal. In practice this causes surprisingly few Max32Us to
2237 survive through to backend code generation.
2239 Integration with the V-bits machinery
2240 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2242 This is again largely straightforward. Mostly the otag and V bits
2243 stuff are independent. The only point of interaction is when the V
2244 bits instrumenter creates a call to a helper function to report an
2245 uninitialised value error -- in that case it must first use schemeE
2246 to get hold of the origin tag expression for the value, and pass
2247 that to the helper too.
2249 There is the usual stuff to do with setting address range
2250 permissions. When memory is painted undefined, we must also know
2251 the origin tag to paint with, which involves some tedious plumbing,
2252 particularly to do with the fast case stack handlers. When memory
2253 is painted defined or noaccess then the origin tags must be forced
2254 to zero.
2256 One of the goals of the implementation was to ensure that the
2257 non-origin tracking mode isn't slowed down at all. To do this,
2258 various functions to do with memory permissions setting (again,
2259 mostly pertaining to the stack) are duplicated for the with- and
2260 without-otag case.
2262 Dealing with stack redzones, and the NIA cache
2263 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2265 This is one of the few non-obvious parts of the implementation.
2267 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2268 reserved area below the stack pointer, that can be used as scratch
2269 space by compiler generated code for functions. In the Memcheck
2270 sources this is referred to as the "stack redzone". The important
2271 thing here is that such redzones are considered volatile across
2272 function calls and returns. So Memcheck takes care to mark them as
2273 undefined for each call and return, on the afflicted platforms.
2274 Past experience shows this is essential in order to get reliable
2275 messages about uninitialised values that come from the stack.
2277 So the question is, when we paint a redzone undefined, what origin
2278 tag should we use for it? Consider a function f() calling g(). If
2279 we paint the redzone using an otag derived from the ExeContext of
2280 the CALL/BL instruction in f, then any errors in g causing it to
2281 use uninitialised values that happen to lie in the redzone, will be
2282 reported as having their origin in f. Which is highly confusing.
2284 The same applies for returns: if, on a return, we paint the redzone
2285 using a origin tag derived from the ExeContext of the RET/BLR
2286 instruction in g, then any later errors in f causing it to use
2287 uninitialised values in the redzone, will be reported as having
2288 their origin in g. Which is just as confusing.
2290 To do it right, in both cases we need to use an origin tag which
2291 pertains to the instruction which dynamically follows the CALL/BL
2292 or RET/BLR. In short, one derived from the NIA - the "next
2293 instruction address".
2295 To make this work, Memcheck's redzone-painting helper,
2296 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2297 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2298 ExeContext's ECU as the basis for the otag used to paint the
2299 redzone. The expensive part of this is converting an NIA into an
2300 ECU, since this happens once for every call and every return. So
2301 we use a simple 511-line, 2-way set associative cache
2302 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2303 the cost out.
2305 Further background comments
2306 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2308 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2309 > it really just the address of the relevant ExeContext?
2311 Well, it's not the address, but a value which has a 1-1 mapping
2312 with ExeContexts, and is guaranteed not to be zero, since zero
2313 denotes (to memcheck) "unknown origin or defined value". So these
2314 UInts are just numbers starting at 4 and incrementing by 4; each
2315 ExeContext is given a number when it is created. (*** NOTE this
2316 confuses otags and ECUs; see comments above ***).
2318 Making these otags 32-bit regardless of the machine's word size
2319 makes the 64-bit implementation easier (next para). And it doesn't
2320 really limit us in any way, since for the tags to overflow would
2321 require that the program somehow caused 2^30-1 different
2322 ExeContexts to be created, in which case it is probably in deep
2323 trouble. Not to mention V will have soaked up many tens of
2324 gigabytes of memory merely to store them all.
2326 So having 64-bit origins doesn't really buy you anything, and has
2327 the following downsides:
2329 Suppose that instead, an otag is a UWord. This would mean that, on
2330 a 64-bit target,
2332 1. It becomes hard to shadow any element of guest state which is
2333 smaller than 8 bytes. To do so means you'd need to find some
2334 8-byte-sized hole in the guest state which you don't want to
2335 shadow, and use that instead to hold the otag. On ppc64, the
2336 condition code register(s) are split into 20 UChar sized pieces,
2337 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2338 and so that would entail finding 160 bytes somewhere else in the
2339 guest state.
2341 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2342 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2343 same) and so I had to look for 4 untracked otag-sized areas in
2344 the guest state to make that possible.
2346 The same problem exists of course when origin tags are only 32
2347 bits, but it's less extreme.
2349 2. (More compelling) it doubles the size of the origin shadow
2350 memory. Given that the shadow memory is organised as a fixed
2351 size cache, and that accuracy of tracking is limited by origins
2352 falling out the cache due to space conflicts, this isn't good.
2354 > Another question: is the origin tracking perfect, or are there
2355 > cases where it fails to determine an origin?
2357 It is imperfect for at least for the following reasons, and
2358 probably more:
2360 * Insufficient capacity in the origin cache. When a line is
2361 evicted from the cache it is gone forever, and so subsequent
2362 queries for the line produce zero, indicating no origin
2363 information. Interestingly, a line containing all zeroes can be
2364 evicted "free" from the cache, since it contains no useful
2365 information, so there is scope perhaps for some cleverer cache
2366 management schemes. (*** NOTE, with the introduction of the
2367 second level origin tag cache, ocacheL2, this is no longer a
2368 problem. ***)
2370 * The origin cache only stores one otag per 32-bits of address
2371 space, plus 4 bits indicating which of the 4 bytes has that tag
2372 and which are considered defined. The result is that if two
2373 undefined bytes in the same word are stored in memory, the first
2374 stored byte's origin will be lost and replaced by the origin for
2375 the second byte.
2377 * Nonzero origin tags for defined values. Consider a binary
2378 operator application op(x,y). Suppose y is undefined (and so has
2379 a valid nonzero origin tag), and x is defined, but erroneously
2380 has a nonzero origin tag (defined values should have tag zero).
2381 If the erroneous tag has a numeric value greater than y's tag,
2382 then the rule for propagating origin tags though binary
2383 operations, which is simply to take the unsigned max of the two
2384 tags, will erroneously propagate x's tag rather than y's.
2386 * Some obscure uses of x86/amd64 byte registers can cause lossage
2387 or confusion of origins. %AH .. %DH are treated as different
2388 from, and unrelated to, their parent registers, %EAX .. %EDX.
2389 So some weird sequences like
2391 movb undefined-value, %AH
2392 movb defined-value, %AL
2393 .. use %AX or %EAX ..
2395 will cause the origin attributed to %AH to be ignored, since %AL,
2396 %AX, %EAX are treated as the same register, and %AH as a
2397 completely separate one.
2399 But having said all that, it actually seems to work fairly well in
2400 practice.
2403 static UWord stats_ocacheL1_find = 0;
2404 static UWord stats_ocacheL1_found_at_1 = 0;
2405 static UWord stats_ocacheL1_found_at_N = 0;
2406 static UWord stats_ocacheL1_misses = 0;
2407 static UWord stats_ocacheL1_lossage = 0;
2408 static UWord stats_ocacheL1_movefwds = 0;
2410 static UWord stats__ocacheL2_finds = 0;
2411 static UWord stats__ocacheL2_adds = 0;
2412 static UWord stats__ocacheL2_dels = 0;
2413 static UWord stats__ocacheL2_misses = 0;
2414 static UWord stats__ocacheL2_n_nodes_max = 0;
2416 /* Cache of 32-bit values, one every 32 bits of address space */
2418 #define OC_BITS_PER_LINE 5
2419 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2421 static INLINE UWord oc_line_offset ( Addr a ) {
2422 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2424 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2425 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2428 #define OC_LINES_PER_SET 2
2430 #define OC_N_SET_BITS 20
2431 #define OC_N_SETS (1 << OC_N_SET_BITS)
2433 /* These settings give:
2434 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2435 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2438 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2441 /* Originally (pre Dec 2021) it was the case that this code had a
2442 parameterizable cache line size, set by changing OC_BITS_PER_LINE.
2443 However, as a result of the speedup fixes necessitated by bug 446103, that
2444 is no longer really the case, and much of the L1 and L2 cache code has been
2445 tuned specifically for the case OC_BITS_PER_LINE == 5 (that is, the line
2446 size is 32 bytes). Changing that would require a bunch of re-tuning
2447 effort. So let's set it in stone for now. */
2448 STATIC_ASSERT(OC_BITS_PER_LINE == 5);
2449 STATIC_ASSERT(OC_LINES_PER_SET == 2);
2451 /* Fundamentally we want an OCacheLine structure (see below) as follows:
2452 struct {
2453 Addr tag;
2454 UInt w32 [OC_W32S_PER_LINE];
2455 UChar descr[OC_W32S_PER_LINE];
2457 However, in various places, we want to set the w32[] and descr[] arrays to
2458 zero, or check if they are zero. This can be a very hot path (per bug
2459 446103). So, instead, we have a union which is either those two arrays
2460 (OCacheLine_Main) or simply an array of ULongs (OCacheLine_W64s). For the
2461 set-zero/test-zero operations, the OCacheLine_W64s are used.
2464 // To ensure that OCacheLine.descr[] will fit in an integral number of ULongs.
2465 STATIC_ASSERT(0 == (OC_W32S_PER_LINE % 8));
2467 #define OC_W64S_PER_MAIN /* "MAIN" meaning "struct OCacheLine_Main" */ \
2468 (OC_W32S_PER_LINE / 2 /* covers OCacheLine_Main.w32[] */ \
2469 + OC_W32S_PER_LINE / 8) /* covers OCacheLine_Main.descr[] */
2470 STATIC_ASSERT(OC_W64S_PER_MAIN == 5);
2472 typedef
2473 ULong OCacheLine_W64s[OC_W64S_PER_MAIN];
2475 typedef
2476 struct {
2477 UInt w32 [OC_W32S_PER_LINE];
2478 UChar descr[OC_W32S_PER_LINE];
2480 OCacheLine_Main;
2482 STATIC_ASSERT(sizeof(OCacheLine_W64s) == sizeof(OCacheLine_Main));
2484 typedef
2485 struct {
2486 Addr tag;
2487 union {
2488 OCacheLine_W64s w64s;
2489 OCacheLine_Main main;
2490 } u;
2492 OCacheLine;
2494 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2495 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2496 and 'z' if all the represented tags are zero. */
2497 static inline UChar classify_OCacheLine ( OCacheLine* line )
2499 UWord i;
2500 if (line->tag == 1/*invalid*/)
2501 return 'e'; /* EMPTY */
2502 tl_assert(is_valid_oc_tag(line->tag));
2504 // BEGIN fast special-case of the test loop below. This will detect
2505 // zero-ness (case 'z') for a subset of cases that the loop below will,
2506 // hence is safe.
2507 if (OC_W64S_PER_MAIN == 5) {
2508 if (line->u.w64s[0] == 0
2509 && line->u.w64s[1] == 0 && line->u.w64s[2] == 0
2510 && line->u.w64s[3] == 0 && line->u.w64s[4] == 0) {
2511 return 'z';
2513 } else {
2514 tl_assert2(0, "unsupported line size (classify_OCacheLine)");
2516 // END fast special-case of the test loop below.
2518 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2519 tl_assert(0 == ((~0xF) & line->u.main.descr[i]));
2520 if (line->u.main.w32[i] > 0 && line->u.main.descr[i] > 0)
2521 return 'n'; /* NONZERO - contains useful info */
2523 return 'z'; /* ZERO - no useful info */
2526 typedef
2527 struct {
2528 OCacheLine line[OC_LINES_PER_SET];
2530 OCacheSet;
2532 typedef
2533 struct {
2534 OCacheSet set[OC_N_SETS];
2536 OCache;
2538 static OCache* ocacheL1 = NULL;
2539 static UWord ocacheL1_event_ctr = 0;
2541 static void init_ocacheL2 ( void ); /* fwds */
2542 static void init_OCache ( void )
2544 UWord line, set;
2545 tl_assert(MC_(clo_mc_level) >= 3);
2546 tl_assert(ocacheL1 == NULL);
2547 SysRes sres = VG_(am_shadow_alloc)(sizeof(OCache));
2548 if (sr_isError(sres)) {
2549 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2550 sizeof(OCache), sr_Err(sres) );
2552 ocacheL1 = (void *)(Addr)sr_Res(sres);
2553 tl_assert(ocacheL1 != NULL);
2554 for (set = 0; set < OC_N_SETS; set++) {
2555 for (line = 0; line < OC_LINES_PER_SET; line++) {
2556 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2559 init_ocacheL2();
2562 static inline void moveLineForwards ( OCacheSet* set, UWord lineno )
2564 OCacheLine tmp;
2565 stats_ocacheL1_movefwds++;
2566 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2567 tmp = set->line[lineno-1];
2568 set->line[lineno-1] = set->line[lineno];
2569 set->line[lineno] = tmp;
2572 static inline void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2573 UWord i;
2574 if (OC_W32S_PER_LINE == 8) {
2575 // BEGIN fast special-case of the loop below
2576 tl_assert(OC_W64S_PER_MAIN == 5);
2577 line->u.w64s[0] = 0;
2578 line->u.w64s[1] = 0;
2579 line->u.w64s[2] = 0;
2580 line->u.w64s[3] = 0;
2581 line->u.w64s[4] = 0;
2582 // END fast special-case of the loop below
2583 } else {
2584 tl_assert2(0, "unsupported line size (zeroise_OCacheLine)");
2585 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2586 line->u.main.w32[i] = 0; /* NO ORIGIN */
2587 line->u.main.descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2590 line->tag = tag;
2593 //////////////////////////////////////////////////////////////
2594 //// OCache backing store
2596 // The backing store for ocacheL1 is, conceptually, an AVL tree of lines that
2597 // got ejected from the L1 (a "victim cache"), and which actually contain
2598 // useful info -- that is, for which classify_OCacheLine would return 'n' and
2599 // no other value. However, the tree can grow large, and searching/updating
2600 // it can be hot paths. Hence we "take out" 12 significant bits of the key by
2601 // having 4096 trees, and select one using HASH_OCACHE_TAG.
2603 // What that hash function returns isn't important so long as it is a pure
2604 // function of the tag values, and is < 4096. However, it is critical for
2605 // performance of long SARPs. Hence the extra shift of 11 bits. This means
2606 // each tree conceptually is assigned to contiguous sequences of 2048 lines in
2607 // the "line address space", giving some locality of reference when scanning
2608 // linearly through address space, as is done by a SARP. Changing that 11 to
2609 // 0 gives terrible performance on long SARPs, presumably because each new
2610 // line is in a different tree, hence we wind up thrashing the (CPU's) caches.
2612 // On 32-bit targets, we have to be a bit careful not to shift out so many
2613 // bits that not all 2^12 trees get used. That leads to the constraint
2614 // (OC_BITS_PER_LINE + 11 + 12) < 32. Note that the 11 is the only thing we
2615 // can change here. In this case we have OC_BITS_PER_LINE == 5, hence the
2616 // inequality is (28 < 32) and so we're good.
2618 // The value 11 was determined empirically from various Firefox runs. 10 or
2619 // 12 also work pretty well.
2621 static OSet* ocachesL2[4096];
2623 STATIC_ASSERT((OC_BITS_PER_LINE + 11 + 12) < 32);
2624 static inline UInt HASH_OCACHE_TAG ( Addr tag ) {
2625 return (UInt)((tag >> (OC_BITS_PER_LINE + 11)) & 0xFFF);
2628 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2629 return VG_(malloc)(cc, szB);
2631 static void ocacheL2_free ( void* v ) {
2632 VG_(free)( v );
2635 /* Stats: # nodes currently in tree */
2636 static UWord stats__ocacheL2_n_nodes = 0;
2638 static void init_ocacheL2 ( void )
2640 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2641 tl_assert(0 == offsetof(OCacheLine,tag));
2642 for (UInt i = 0; i < 4096; i++) {
2643 tl_assert(!ocachesL2[i]);
2644 ocachesL2[i]
2645 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2646 NULL, /* fast cmp */
2647 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2649 stats__ocacheL2_n_nodes = 0;
2652 /* Find line with the given tag in the tree, or NULL if not found. */
2653 static inline OCacheLine* ocacheL2_find_tag ( Addr tag )
2655 OCacheLine* line;
2656 tl_assert(is_valid_oc_tag(tag));
2657 stats__ocacheL2_finds++;
2658 OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
2659 line = VG_(OSetGen_Lookup)( oset, &tag );
2660 return line;
2663 /* Delete the line with the given tag from the tree, if it is present, and
2664 free up the associated memory. */
2665 static void ocacheL2_del_tag ( Addr tag )
2667 OCacheLine* line;
2668 tl_assert(is_valid_oc_tag(tag));
2669 stats__ocacheL2_dels++;
2670 OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
2671 line = VG_(OSetGen_Remove)( oset, &tag );
2672 if (line) {
2673 VG_(OSetGen_FreeNode)(oset, line);
2674 tl_assert(stats__ocacheL2_n_nodes > 0);
2675 stats__ocacheL2_n_nodes--;
2679 /* Add a copy of the given line to the tree. It must not already be
2680 present. */
2681 static void ocacheL2_add_line ( OCacheLine* line )
2683 OCacheLine* copy;
2684 tl_assert(is_valid_oc_tag(line->tag));
2685 OSet* oset = ocachesL2[HASH_OCACHE_TAG(line->tag)];
2686 copy = VG_(OSetGen_AllocNode)( oset, sizeof(OCacheLine) );
2687 *copy = *line;
2688 stats__ocacheL2_adds++;
2689 VG_(OSetGen_Insert)( oset, copy );
2690 stats__ocacheL2_n_nodes++;
2691 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2692 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2695 ////
2696 //////////////////////////////////////////////////////////////
2698 __attribute__((noinline))
2699 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2701 OCacheLine *victim, *inL2;
2702 UChar c;
2703 UWord line;
2704 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2705 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2706 UWord tag = a & tagmask;
2707 tl_assert(setno < OC_N_SETS);
2709 /* we already tried line == 0; skip therefore. */
2710 for (line = 1; line < OC_LINES_PER_SET; line++) {
2711 if (ocacheL1->set[setno].line[line].tag == tag) {
2712 if (line == 1) {
2713 stats_ocacheL1_found_at_1++;
2714 } else {
2715 stats_ocacheL1_found_at_N++;
2717 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2718 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2719 moveLineForwards( &ocacheL1->set[setno], line );
2720 line--;
2722 return &ocacheL1->set[setno].line[line];
2726 /* A miss. Use the last slot. Implicitly this means we're
2727 ejecting the line in the last slot. */
2728 stats_ocacheL1_misses++;
2729 tl_assert(line == OC_LINES_PER_SET);
2730 line--;
2731 tl_assert(line > 0);
2733 /* First, move the to-be-ejected line to the L2 cache. */
2734 victim = &ocacheL1->set[setno].line[line];
2735 c = classify_OCacheLine(victim);
2736 switch (c) {
2737 case 'e':
2738 /* the line is empty (has invalid tag); ignore it. */
2739 break;
2740 case 'z':
2741 /* line contains zeroes. We must ensure the backing store is
2742 updated accordingly, either by copying the line there
2743 verbatim, or by ensuring it isn't present there. We
2744 choose the latter on the basis that it reduces the size of
2745 the backing store. */
2746 ocacheL2_del_tag( victim->tag );
2747 break;
2748 case 'n':
2749 /* line contains at least one real, useful origin. Copy it
2750 to the backing store. */
2751 stats_ocacheL1_lossage++;
2752 inL2 = ocacheL2_find_tag( victim->tag );
2753 if (inL2) {
2754 *inL2 = *victim;
2755 } else {
2756 ocacheL2_add_line( victim );
2758 break;
2759 default:
2760 tl_assert(0);
2763 /* Now we must reload the L1 cache from the backing tree, if
2764 possible. */
2765 tl_assert(tag != victim->tag); /* stay sane */
2766 inL2 = ocacheL2_find_tag( tag );
2767 if (inL2) {
2768 /* We're in luck. It's in the L2. */
2769 ocacheL1->set[setno].line[line] = *inL2;
2770 } else {
2771 /* Missed at both levels of the cache hierarchy. We have to
2772 declare it as full of zeroes (unknown origins). */
2773 stats__ocacheL2_misses++;
2774 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2777 /* Move it one forwards */
2778 moveLineForwards( &ocacheL1->set[setno], line );
2779 line--;
2781 return &ocacheL1->set[setno].line[line];
2784 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2786 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2787 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2788 UWord tag = a & tagmask;
2790 stats_ocacheL1_find++;
2792 if (OC_ENABLE_ASSERTIONS) {
2793 tl_assert(setno >= 0 && setno < OC_N_SETS);
2794 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2797 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2798 return &ocacheL1->set[setno].line[0];
2801 return find_OCacheLine_SLOW( a );
2804 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2806 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2807 //// Set the origins for a+0 .. a+7
2808 { OCacheLine* line;
2809 UWord lineoff = oc_line_offset(a);
2810 if (OC_ENABLE_ASSERTIONS) {
2811 tl_assert(lineoff >= 0
2812 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2814 line = find_OCacheLine( a );
2815 line->u.main.descr[lineoff+0] = 0xF;
2816 line->u.main.descr[lineoff+1] = 0xF;
2817 line->u.main.w32[lineoff+0] = otag;
2818 line->u.main.w32[lineoff+1] = otag;
2820 //// END inlined, specialised version of MC_(helperc_b_store8)
2824 /*------------------------------------------------------------*/
2825 /*--- Aligned fast case permission setters, ---*/
2826 /*--- for dealing with stacks ---*/
2827 /*------------------------------------------------------------*/
2829 /*--------------------- 32-bit ---------------------*/
2831 /* Nb: by "aligned" here we mean 4-byte aligned */
2833 static INLINE void make_aligned_word32_undefined ( Addr a )
2835 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2837 #ifndef PERF_FAST_STACK2
2838 make_mem_undefined(a, 4);
2839 #else
2841 UWord sm_off;
2842 SecMap* sm;
2844 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2845 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2846 make_mem_undefined(a, 4);
2847 return;
2850 sm = get_secmap_for_writing_low(a);
2851 sm_off = SM_OFF(a);
2852 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2854 #endif
2857 static INLINE
2858 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2860 make_aligned_word32_undefined(a);
2861 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2862 //// Set the origins for a+0 .. a+3
2863 { OCacheLine* line;
2864 UWord lineoff = oc_line_offset(a);
2865 if (OC_ENABLE_ASSERTIONS) {
2866 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2868 line = find_OCacheLine( a );
2869 line->u.main.descr[lineoff] = 0xF;
2870 line->u.main.w32[lineoff] = otag;
2872 //// END inlined, specialised version of MC_(helperc_b_store4)
2875 static INLINE
2876 void make_aligned_word32_noaccess ( Addr a )
2878 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2880 #ifndef PERF_FAST_STACK2
2881 MC_(make_mem_noaccess)(a, 4);
2882 #else
2884 UWord sm_off;
2885 SecMap* sm;
2887 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2888 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2889 MC_(make_mem_noaccess)(a, 4);
2890 return;
2893 sm = get_secmap_for_writing_low(a);
2894 sm_off = SM_OFF(a);
2895 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2897 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2898 //// Set the origins for a+0 .. a+3.
2899 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2900 OCacheLine* line;
2901 UWord lineoff = oc_line_offset(a);
2902 if (OC_ENABLE_ASSERTIONS) {
2903 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2905 line = find_OCacheLine( a );
2906 line->u.main.descr[lineoff] = 0;
2908 //// END inlined, specialised version of MC_(helperc_b_store4)
2910 #endif
2913 /*--------------------- 64-bit ---------------------*/
2915 /* Nb: by "aligned" here we mean 8-byte aligned */
2917 static INLINE void make_aligned_word64_undefined ( Addr a )
2919 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2921 #ifndef PERF_FAST_STACK2
2922 make_mem_undefined(a, 8);
2923 #else
2925 UWord sm_off16;
2926 SecMap* sm;
2928 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2929 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2930 make_mem_undefined(a, 8);
2931 return;
2934 sm = get_secmap_for_writing_low(a);
2935 sm_off16 = SM_OFF_16(a);
2936 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
2938 #endif
2941 static INLINE
2942 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2944 make_aligned_word64_undefined(a);
2945 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2946 //// Set the origins for a+0 .. a+7
2947 { OCacheLine* line;
2948 UWord lineoff = oc_line_offset(a);
2949 tl_assert(lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2950 line = find_OCacheLine( a );
2951 line->u.main.descr[lineoff+0] = 0xF;
2952 line->u.main.descr[lineoff+1] = 0xF;
2953 line->u.main.w32[lineoff+0] = otag;
2954 line->u.main.w32[lineoff+1] = otag;
2956 //// END inlined, specialised version of MC_(helperc_b_store8)
2959 static INLINE
2960 void make_aligned_word64_noaccess ( Addr a )
2962 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2964 #ifndef PERF_FAST_STACK2
2965 MC_(make_mem_noaccess)(a, 8);
2966 #else
2968 UWord sm_off16;
2969 SecMap* sm;
2971 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2972 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2973 MC_(make_mem_noaccess)(a, 8);
2974 return;
2977 sm = get_secmap_for_writing_low(a);
2978 sm_off16 = SM_OFF_16(a);
2979 sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
2981 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2982 //// Clear the origins for a+0 .. a+7.
2983 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2984 OCacheLine* line;
2985 UWord lineoff = oc_line_offset(a);
2986 tl_assert(lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2987 line = find_OCacheLine( a );
2988 line->u.main.descr[lineoff+0] = 0;
2989 line->u.main.descr[lineoff+1] = 0;
2991 //// END inlined, specialised version of MC_(helperc_b_store8)
2993 #endif
2997 /*------------------------------------------------------------*/
2998 /*--- Stack pointer adjustment ---*/
2999 /*------------------------------------------------------------*/
3001 #ifdef PERF_FAST_STACK
3002 # define MAYBE_USED
3003 #else
3004 # define MAYBE_USED __attribute__((unused))
3005 #endif
3007 /*--------------- adjustment by 4 bytes ---------------*/
3009 MAYBE_USED
3010 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
3012 UInt otag = ecu | MC_OKIND_STACK;
3013 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
3014 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3015 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3016 } else {
3017 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
3021 MAYBE_USED
3022 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
3024 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
3025 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3026 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3027 } else {
3028 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
3032 MAYBE_USED
3033 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
3035 PROF_EVENT(MCPE_DIE_MEM_STACK_4);
3036 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3037 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3038 } else {
3039 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
3043 /*--------------- adjustment by 8 bytes ---------------*/
3045 MAYBE_USED
3046 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
3048 UInt otag = ecu | MC_OKIND_STACK;
3049 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
3050 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3051 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3052 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3053 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3054 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
3055 } else {
3056 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
3060 MAYBE_USED
3061 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
3063 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
3064 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3065 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3066 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3067 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3068 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3069 } else {
3070 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
3074 MAYBE_USED
3075 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
3077 PROF_EVENT(MCPE_DIE_MEM_STACK_8);
3078 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3079 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3080 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3081 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3082 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3083 } else {
3084 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
3088 /*--------------- adjustment by 12 bytes ---------------*/
3090 MAYBE_USED
3091 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
3093 UInt otag = ecu | MC_OKIND_STACK;
3094 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
3095 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3096 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3097 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3098 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3099 /* from previous test we don't have 8-alignment at offset +0,
3100 hence must have 8 alignment at offsets +4/-4. Hence safe to
3101 do 4 at +0 and then 8 at +4/. */
3102 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3103 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
3104 } else {
3105 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
3109 MAYBE_USED
3110 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
3112 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
3113 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3114 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3115 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3116 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3117 /* from previous test we don't have 8-alignment at offset +0,
3118 hence must have 8 alignment at offsets +4/-4. Hence safe to
3119 do 4 at +0 and then 8 at +4/. */
3120 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3121 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3122 } else {
3123 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
3127 MAYBE_USED
3128 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
3130 PROF_EVENT(MCPE_DIE_MEM_STACK_12);
3131 /* Note the -12 in the test */
3132 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
3133 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3134 -4. */
3135 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3136 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3137 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3138 /* We have 4-alignment at +0, but we don't have 8-alignment at
3139 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3140 and then 8 at -8. */
3141 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3142 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3143 } else {
3144 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
3148 /*--------------- adjustment by 16 bytes ---------------*/
3150 MAYBE_USED
3151 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
3153 UInt otag = ecu | MC_OKIND_STACK;
3154 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3155 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3156 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3157 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3158 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3159 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3160 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3161 Hence do 4 at +0, 8 at +4, 4 at +12. */
3162 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3163 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3164 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3165 } else {
3166 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
3170 MAYBE_USED
3171 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
3173 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3174 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3175 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3176 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3177 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3178 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3179 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3180 Hence do 4 at +0, 8 at +4, 4 at +12. */
3181 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3182 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3183 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3184 } else {
3185 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
3189 MAYBE_USED
3190 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
3192 PROF_EVENT(MCPE_DIE_MEM_STACK_16);
3193 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3194 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3195 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3196 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3197 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3198 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3199 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3200 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3201 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3202 } else {
3203 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
3207 /*--------------- adjustment by 32 bytes ---------------*/
3209 MAYBE_USED
3210 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
3212 UInt otag = ecu | MC_OKIND_STACK;
3213 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3214 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3215 /* Straightforward */
3216 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3217 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3218 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3219 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3220 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3221 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3222 +0,+28. */
3223 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3224 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3225 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3226 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3227 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3228 } else {
3229 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3233 MAYBE_USED
3234 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3236 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3237 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3238 /* Straightforward */
3239 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3240 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3241 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3242 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3243 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3244 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3245 +0,+28. */
3246 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3247 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3248 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3249 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3250 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3251 } else {
3252 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3256 MAYBE_USED
3257 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3259 PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3260 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3261 /* Straightforward */
3262 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3263 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3264 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3265 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3266 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3267 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3268 4 at -32,-4. */
3269 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3270 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3271 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3272 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3273 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3274 } else {
3275 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3279 /*--------------- adjustment by 112 bytes ---------------*/
3281 MAYBE_USED
3282 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3284 UInt otag = ecu | MC_OKIND_STACK;
3285 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3286 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3287 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3288 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3293 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3294 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3295 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3296 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3297 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3298 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3299 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3300 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3301 } else {
3302 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3306 MAYBE_USED
3307 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3309 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3310 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3311 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3312 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3321 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3322 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3323 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3324 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3325 } else {
3326 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3330 MAYBE_USED
3331 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3333 PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3334 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3335 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3336 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3349 } else {
3350 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3354 /*--------------- adjustment by 128 bytes ---------------*/
3356 MAYBE_USED
3357 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3359 UInt otag = ecu | MC_OKIND_STACK;
3360 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3361 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3362 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3363 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3378 } else {
3379 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3383 MAYBE_USED
3384 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3386 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3387 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3388 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3389 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3390 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3391 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3404 } else {
3405 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3409 MAYBE_USED
3410 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3412 PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3413 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3414 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3415 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3416 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3417 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3418 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3419 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3420 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3421 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3430 } else {
3431 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3435 /*--------------- adjustment by 144 bytes ---------------*/
3437 MAYBE_USED
3438 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3440 UInt otag = ecu | MC_OKIND_STACK;
3441 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3442 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3443 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3444 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3445 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3446 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3447 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3448 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3449 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3450 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3451 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3452 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3453 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3454 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3455 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3456 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3457 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3458 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3459 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3460 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3461 } else {
3462 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3466 MAYBE_USED
3467 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3469 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3470 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3471 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3472 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3473 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3474 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3475 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3476 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3477 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3478 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3479 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3480 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3481 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3482 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3483 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3484 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3485 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3486 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3487 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3488 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3489 } else {
3490 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3494 MAYBE_USED
3495 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3497 PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3498 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3499 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3500 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3501 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3502 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3503 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3504 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3505 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3506 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3507 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3508 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3509 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3510 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3511 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3512 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3513 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3514 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3515 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3516 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3517 } else {
3518 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3522 /*--------------- adjustment by 160 bytes ---------------*/
3524 MAYBE_USED
3525 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3527 UInt otag = ecu | MC_OKIND_STACK;
3528 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3529 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3530 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3531 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3532 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3533 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3534 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3535 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3536 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3537 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3538 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3539 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3540 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3541 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3542 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3543 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3544 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3545 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3546 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3547 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3548 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3549 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3550 } else {
3551 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3555 MAYBE_USED
3556 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3558 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3559 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3560 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3561 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3562 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3563 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3564 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3565 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3566 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3567 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3568 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3569 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3570 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3571 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3572 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3573 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3574 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3575 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3576 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3577 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3578 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3579 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3580 } else {
3581 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3585 MAYBE_USED
3586 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3588 PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3589 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3590 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3591 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3592 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3593 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3594 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3595 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3596 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3597 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3598 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3599 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3600 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3601 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3602 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3603 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3604 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3605 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3606 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3607 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3608 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3609 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3610 } else {
3611 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3615 /*--------------- adjustment by N bytes ---------------*/
3617 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3619 UInt otag = ecu | MC_OKIND_STACK;
3620 PROF_EVENT(MCPE_NEW_MEM_STACK);
3621 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3624 static void mc_new_mem_stack ( Addr a, SizeT len )
3626 PROF_EVENT(MCPE_NEW_MEM_STACK);
3627 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3630 static void mc_die_mem_stack ( Addr a, SizeT len )
3632 PROF_EVENT(MCPE_DIE_MEM_STACK);
3633 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3637 /* The AMD64 ABI says:
3639 "The 128-byte area beyond the location pointed to by %rsp is considered
3640 to be reserved and shall not be modified by signal or interrupt
3641 handlers. Therefore, functions may use this area for temporary data
3642 that is not needed across function calls. In particular, leaf functions
3643 may use this area for their entire stack frame, rather than adjusting
3644 the stack pointer in the prologue and epilogue. This area is known as
3645 red zone [sic]."
3647 So after any call or return we need to mark this redzone as containing
3648 undefined values.
3650 Consider this: we're in function f. f calls g. g moves rsp down
3651 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3652 defined. g returns. f is buggy and reads from parts of the red zone
3653 that it didn't write on. But because g filled that area in, f is going
3654 to be picking up defined V bits and so any errors from reading bits of
3655 the red zone it didn't write, will be missed. The only solution I could
3656 think of was to make the red zone undefined when g returns to f.
3658 This is in accordance with the ABI, which makes it clear the redzone
3659 is volatile across function calls.
3661 The problem occurs the other way round too: f could fill the RZ up
3662 with defined values and g could mistakenly read them. So the RZ
3663 also needs to be nuked on function calls.
3667 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3668 improved so as to have a lower miss rate. */
3670 static UWord stats__nia_cache_queries = 0;
3671 static UWord stats__nia_cache_misses = 0;
3673 typedef
3674 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3675 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3676 WCacheEnt;
3678 #define N_NIA_TO_ECU_CACHE 511
3680 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3682 static void init_nia_to_ecu_cache ( void )
3684 UWord i;
3685 Addr zero_addr = 0;
3686 ExeContext* zero_ec;
3687 UInt zero_ecu;
3688 /* Fill all the slots with an entry for address zero, and the
3689 relevant otags accordingly. Hence the cache is initially filled
3690 with valid data. */
3691 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3692 tl_assert(zero_ec);
3693 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3694 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3695 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3696 nia_to_ecu_cache[i].nia0 = zero_addr;
3697 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3698 nia_to_ecu_cache[i].nia1 = zero_addr;
3699 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3703 static inline UInt convert_nia_to_ecu ( Addr nia )
3705 UWord i;
3706 UInt ecu;
3707 ExeContext* ec;
3709 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3711 stats__nia_cache_queries++;
3712 i = nia % N_NIA_TO_ECU_CACHE;
3713 tl_assert(i < N_NIA_TO_ECU_CACHE);
3715 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3716 return nia_to_ecu_cache[i].ecu0;
3718 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3719 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3720 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3721 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3722 # undef SWAP
3723 return nia_to_ecu_cache[i].ecu0;
3726 stats__nia_cache_misses++;
3727 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3728 tl_assert(ec);
3729 ecu = VG_(get_ECU_from_ExeContext)(ec);
3730 tl_assert(VG_(is_plausible_ECU)(ecu));
3732 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3733 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3735 nia_to_ecu_cache[i].nia0 = nia;
3736 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3737 return ecu;
3741 /* This marks the stack as addressible but undefined, after a call or
3742 return for a target that has an ABI defined stack redzone. It
3743 happens quite a lot and needs to be fast. This is the version for
3744 origin tracking. The non-origin-tracking version is below. */
3745 VG_REGPARM(3)
3746 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
3748 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
3749 if (0)
3750 VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3751 base, len, nia );
3753 UInt ecu = convert_nia_to_ecu ( nia );
3754 tl_assert(VG_(is_plausible_ECU)(ecu));
3756 UInt otag = ecu | MC_OKIND_STACK;
3758 # if 0
3759 /* Slow(ish) version, which is fairly easily seen to be correct.
3761 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3762 make_aligned_word64_undefined_w_otag(base + 0, otag);
3763 make_aligned_word64_undefined_w_otag(base + 8, otag);
3764 make_aligned_word64_undefined_w_otag(base + 16, otag);
3765 make_aligned_word64_undefined_w_otag(base + 24, otag);
3767 make_aligned_word64_undefined_w_otag(base + 32, otag);
3768 make_aligned_word64_undefined_w_otag(base + 40, otag);
3769 make_aligned_word64_undefined_w_otag(base + 48, otag);
3770 make_aligned_word64_undefined_w_otag(base + 56, otag);
3772 make_aligned_word64_undefined_w_otag(base + 64, otag);
3773 make_aligned_word64_undefined_w_otag(base + 72, otag);
3774 make_aligned_word64_undefined_w_otag(base + 80, otag);
3775 make_aligned_word64_undefined_w_otag(base + 88, otag);
3777 make_aligned_word64_undefined_w_otag(base + 96, otag);
3778 make_aligned_word64_undefined_w_otag(base + 104, otag);
3779 make_aligned_word64_undefined_w_otag(base + 112, otag);
3780 make_aligned_word64_undefined_w_otag(base + 120, otag);
3781 } else {
3782 MC_(make_mem_undefined_w_otag)(base, len, otag);
3784 # endif
3786 /* Idea is: go fast when
3787 * 8-aligned and length is 128
3788 * the sm is available in the main primary map
3789 * the address range falls entirely with a single secondary map
3790 If all those conditions hold, just update the V+A bits by writing
3791 directly into the vabits array. (If the sm was distinguished, this
3792 will make a copy and then write to it.)
3794 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3795 /* Now we know the address range is suitably sized and aligned. */
3796 UWord a_lo = (UWord)(base);
3797 UWord a_hi = (UWord)(base + 128 - 1);
3798 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3799 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3800 /* Now we know the entire range is within the main primary map. */
3801 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3802 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3803 if (LIKELY(pm_off_lo == pm_off_hi)) {
3804 /* Now we know that the entire address range falls within a
3805 single secondary map, and that that secondary 'lives' in
3806 the main primary map. */
3807 SecMap* sm = get_secmap_for_writing_low(a_lo);
3808 UWord v_off16 = SM_OFF_16(a_lo);
3809 UShort* p = &sm->vabits16[v_off16];
3810 p[ 0] = VA_BITS16_UNDEFINED;
3811 p[ 1] = VA_BITS16_UNDEFINED;
3812 p[ 2] = VA_BITS16_UNDEFINED;
3813 p[ 3] = VA_BITS16_UNDEFINED;
3814 p[ 4] = VA_BITS16_UNDEFINED;
3815 p[ 5] = VA_BITS16_UNDEFINED;
3816 p[ 6] = VA_BITS16_UNDEFINED;
3817 p[ 7] = VA_BITS16_UNDEFINED;
3818 p[ 8] = VA_BITS16_UNDEFINED;
3819 p[ 9] = VA_BITS16_UNDEFINED;
3820 p[10] = VA_BITS16_UNDEFINED;
3821 p[11] = VA_BITS16_UNDEFINED;
3822 p[12] = VA_BITS16_UNDEFINED;
3823 p[13] = VA_BITS16_UNDEFINED;
3824 p[14] = VA_BITS16_UNDEFINED;
3825 p[15] = VA_BITS16_UNDEFINED;
3826 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3827 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3828 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3829 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3830 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3831 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3832 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3833 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3834 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3835 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3836 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3837 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3838 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3839 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3840 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3841 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3842 return;
3847 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3848 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3849 /* Now we know the address range is suitably sized and aligned. */
3850 UWord a_lo = (UWord)(base);
3851 UWord a_hi = (UWord)(base + 288 - 1);
3852 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3853 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3854 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3855 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3856 if (LIKELY(pm_off_lo == pm_off_hi)) {
3857 /* Now we know that the entire address range falls within a
3858 single secondary map, and that that secondary 'lives' in
3859 the main primary map. */
3860 SecMap* sm = get_secmap_for_writing_low(a_lo);
3861 UWord v_off16 = SM_OFF_16(a_lo);
3862 UShort* p = &sm->vabits16[v_off16];
3863 p[ 0] = VA_BITS16_UNDEFINED;
3864 p[ 1] = VA_BITS16_UNDEFINED;
3865 p[ 2] = VA_BITS16_UNDEFINED;
3866 p[ 3] = VA_BITS16_UNDEFINED;
3867 p[ 4] = VA_BITS16_UNDEFINED;
3868 p[ 5] = VA_BITS16_UNDEFINED;
3869 p[ 6] = VA_BITS16_UNDEFINED;
3870 p[ 7] = VA_BITS16_UNDEFINED;
3871 p[ 8] = VA_BITS16_UNDEFINED;
3872 p[ 9] = VA_BITS16_UNDEFINED;
3873 p[10] = VA_BITS16_UNDEFINED;
3874 p[11] = VA_BITS16_UNDEFINED;
3875 p[12] = VA_BITS16_UNDEFINED;
3876 p[13] = VA_BITS16_UNDEFINED;
3877 p[14] = VA_BITS16_UNDEFINED;
3878 p[15] = VA_BITS16_UNDEFINED;
3879 p[16] = VA_BITS16_UNDEFINED;
3880 p[17] = VA_BITS16_UNDEFINED;
3881 p[18] = VA_BITS16_UNDEFINED;
3882 p[19] = VA_BITS16_UNDEFINED;
3883 p[20] = VA_BITS16_UNDEFINED;
3884 p[21] = VA_BITS16_UNDEFINED;
3885 p[22] = VA_BITS16_UNDEFINED;
3886 p[23] = VA_BITS16_UNDEFINED;
3887 p[24] = VA_BITS16_UNDEFINED;
3888 p[25] = VA_BITS16_UNDEFINED;
3889 p[26] = VA_BITS16_UNDEFINED;
3890 p[27] = VA_BITS16_UNDEFINED;
3891 p[28] = VA_BITS16_UNDEFINED;
3892 p[29] = VA_BITS16_UNDEFINED;
3893 p[30] = VA_BITS16_UNDEFINED;
3894 p[31] = VA_BITS16_UNDEFINED;
3895 p[32] = VA_BITS16_UNDEFINED;
3896 p[33] = VA_BITS16_UNDEFINED;
3897 p[34] = VA_BITS16_UNDEFINED;
3898 p[35] = VA_BITS16_UNDEFINED;
3899 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3900 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3901 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3902 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3903 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3904 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3905 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3906 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3907 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3908 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3909 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3910 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3911 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3912 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3913 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3914 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3915 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3916 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3917 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3918 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3919 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3920 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3921 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3922 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3923 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3924 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3925 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3926 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3927 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3928 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3929 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3930 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3931 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3932 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3933 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3934 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3935 return;
3940 /* else fall into slow case */
3941 MC_(make_mem_undefined_w_otag)(base, len, otag);
3945 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3946 specialised for the non-origin-tracking case. */
3947 VG_REGPARM(2)
3948 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
3950 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
3951 if (0)
3952 VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3953 base, len );
3955 # if 0
3956 /* Slow(ish) version, which is fairly easily seen to be correct.
3958 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3959 make_aligned_word64_undefined(base + 0);
3960 make_aligned_word64_undefined(base + 8);
3961 make_aligned_word64_undefined(base + 16);
3962 make_aligned_word64_undefined(base + 24);
3964 make_aligned_word64_undefined(base + 32);
3965 make_aligned_word64_undefined(base + 40);
3966 make_aligned_word64_undefined(base + 48);
3967 make_aligned_word64_undefined(base + 56);
3969 make_aligned_word64_undefined(base + 64);
3970 make_aligned_word64_undefined(base + 72);
3971 make_aligned_word64_undefined(base + 80);
3972 make_aligned_word64_undefined(base + 88);
3974 make_aligned_word64_undefined(base + 96);
3975 make_aligned_word64_undefined(base + 104);
3976 make_aligned_word64_undefined(base + 112);
3977 make_aligned_word64_undefined(base + 120);
3978 } else {
3979 make_mem_undefined(base, len);
3981 # endif
3983 /* Idea is: go fast when
3984 * 8-aligned and length is 128
3985 * the sm is available in the main primary map
3986 * the address range falls entirely with a single secondary map
3987 If all those conditions hold, just update the V+A bits by writing
3988 directly into the vabits array. (If the sm was distinguished, this
3989 will make a copy and then write to it.)
3991 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3992 /* Now we know the address range is suitably sized and aligned. */
3993 UWord a_lo = (UWord)(base);
3994 UWord a_hi = (UWord)(base + 128 - 1);
3995 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3996 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3997 /* Now we know the entire range is within the main primary map. */
3998 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3999 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4000 if (LIKELY(pm_off_lo == pm_off_hi)) {
4001 /* Now we know that the entire address range falls within a
4002 single secondary map, and that that secondary 'lives' in
4003 the main primary map. */
4004 SecMap* sm = get_secmap_for_writing_low(a_lo);
4005 UWord v_off16 = SM_OFF_16(a_lo);
4006 UShort* p = &sm->vabits16[v_off16];
4007 p[ 0] = VA_BITS16_UNDEFINED;
4008 p[ 1] = VA_BITS16_UNDEFINED;
4009 p[ 2] = VA_BITS16_UNDEFINED;
4010 p[ 3] = VA_BITS16_UNDEFINED;
4011 p[ 4] = VA_BITS16_UNDEFINED;
4012 p[ 5] = VA_BITS16_UNDEFINED;
4013 p[ 6] = VA_BITS16_UNDEFINED;
4014 p[ 7] = VA_BITS16_UNDEFINED;
4015 p[ 8] = VA_BITS16_UNDEFINED;
4016 p[ 9] = VA_BITS16_UNDEFINED;
4017 p[10] = VA_BITS16_UNDEFINED;
4018 p[11] = VA_BITS16_UNDEFINED;
4019 p[12] = VA_BITS16_UNDEFINED;
4020 p[13] = VA_BITS16_UNDEFINED;
4021 p[14] = VA_BITS16_UNDEFINED;
4022 p[15] = VA_BITS16_UNDEFINED;
4023 return;
4028 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
4029 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
4030 /* Now we know the address range is suitably sized and aligned. */
4031 UWord a_lo = (UWord)(base);
4032 UWord a_hi = (UWord)(base + 288 - 1);
4033 tl_assert(a_lo < a_hi); // paranoia: detect overflow
4034 if (a_hi <= MAX_PRIMARY_ADDRESS) {
4035 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4036 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4037 if (LIKELY(pm_off_lo == pm_off_hi)) {
4038 /* Now we know that the entire address range falls within a
4039 single secondary map, and that that secondary 'lives' in
4040 the main primary map. */
4041 SecMap* sm = get_secmap_for_writing_low(a_lo);
4042 UWord v_off16 = SM_OFF_16(a_lo);
4043 UShort* p = &sm->vabits16[v_off16];
4044 p[ 0] = VA_BITS16_UNDEFINED;
4045 p[ 1] = VA_BITS16_UNDEFINED;
4046 p[ 2] = VA_BITS16_UNDEFINED;
4047 p[ 3] = VA_BITS16_UNDEFINED;
4048 p[ 4] = VA_BITS16_UNDEFINED;
4049 p[ 5] = VA_BITS16_UNDEFINED;
4050 p[ 6] = VA_BITS16_UNDEFINED;
4051 p[ 7] = VA_BITS16_UNDEFINED;
4052 p[ 8] = VA_BITS16_UNDEFINED;
4053 p[ 9] = VA_BITS16_UNDEFINED;
4054 p[10] = VA_BITS16_UNDEFINED;
4055 p[11] = VA_BITS16_UNDEFINED;
4056 p[12] = VA_BITS16_UNDEFINED;
4057 p[13] = VA_BITS16_UNDEFINED;
4058 p[14] = VA_BITS16_UNDEFINED;
4059 p[15] = VA_BITS16_UNDEFINED;
4060 p[16] = VA_BITS16_UNDEFINED;
4061 p[17] = VA_BITS16_UNDEFINED;
4062 p[18] = VA_BITS16_UNDEFINED;
4063 p[19] = VA_BITS16_UNDEFINED;
4064 p[20] = VA_BITS16_UNDEFINED;
4065 p[21] = VA_BITS16_UNDEFINED;
4066 p[22] = VA_BITS16_UNDEFINED;
4067 p[23] = VA_BITS16_UNDEFINED;
4068 p[24] = VA_BITS16_UNDEFINED;
4069 p[25] = VA_BITS16_UNDEFINED;
4070 p[26] = VA_BITS16_UNDEFINED;
4071 p[27] = VA_BITS16_UNDEFINED;
4072 p[28] = VA_BITS16_UNDEFINED;
4073 p[29] = VA_BITS16_UNDEFINED;
4074 p[30] = VA_BITS16_UNDEFINED;
4075 p[31] = VA_BITS16_UNDEFINED;
4076 p[32] = VA_BITS16_UNDEFINED;
4077 p[33] = VA_BITS16_UNDEFINED;
4078 p[34] = VA_BITS16_UNDEFINED;
4079 p[35] = VA_BITS16_UNDEFINED;
4080 return;
4085 /* else fall into slow case */
4086 make_mem_undefined(base, len);
4090 /* And this is an even more specialised case, for the case where there
4091 is no origin tracking, and the length is 128. */
4092 VG_REGPARM(1)
4093 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
4095 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
4096 if (0)
4097 VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
4099 # if 0
4100 /* Slow(ish) version, which is fairly easily seen to be correct.
4102 if (LIKELY( VG_IS_8_ALIGNED(base) )) {
4103 make_aligned_word64_undefined(base + 0);
4104 make_aligned_word64_undefined(base + 8);
4105 make_aligned_word64_undefined(base + 16);
4106 make_aligned_word64_undefined(base + 24);
4108 make_aligned_word64_undefined(base + 32);
4109 make_aligned_word64_undefined(base + 40);
4110 make_aligned_word64_undefined(base + 48);
4111 make_aligned_word64_undefined(base + 56);
4113 make_aligned_word64_undefined(base + 64);
4114 make_aligned_word64_undefined(base + 72);
4115 make_aligned_word64_undefined(base + 80);
4116 make_aligned_word64_undefined(base + 88);
4118 make_aligned_word64_undefined(base + 96);
4119 make_aligned_word64_undefined(base + 104);
4120 make_aligned_word64_undefined(base + 112);
4121 make_aligned_word64_undefined(base + 120);
4122 } else {
4123 make_mem_undefined(base, 128);
4125 # endif
4127 /* Idea is: go fast when
4128 * 16-aligned and length is 128
4129 * the sm is available in the main primary map
4130 * the address range falls entirely with a single secondary map
4131 If all those conditions hold, just update the V+A bits by writing
4132 directly into the vabits array. (If the sm was distinguished, this
4133 will make a copy and then write to it.)
4135 Typically this applies to amd64 'ret' instructions, since RSP is
4136 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4138 if (LIKELY( VG_IS_16_ALIGNED(base) )) {
4139 /* Now we know the address range is suitably sized and aligned. */
4140 UWord a_lo = (UWord)(base);
4141 UWord a_hi = (UWord)(base + 128 - 1);
4142 /* FIXME: come up with a sane story on the wraparound case
4143 (which of course cnanot happen, but still..) */
4144 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4145 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4146 /* Now we know the entire range is within the main primary map. */
4147 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4148 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4149 if (LIKELY(pm_off_lo == pm_off_hi)) {
4150 /* Now we know that the entire address range falls within a
4151 single secondary map, and that that secondary 'lives' in
4152 the main primary map. */
4153 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
4154 SecMap* sm = get_secmap_for_writing_low(a_lo);
4155 UWord v_off = SM_OFF(a_lo);
4156 UInt* w32 = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
4157 w32[ 0] = VA_BITS32_UNDEFINED;
4158 w32[ 1] = VA_BITS32_UNDEFINED;
4159 w32[ 2] = VA_BITS32_UNDEFINED;
4160 w32[ 3] = VA_BITS32_UNDEFINED;
4161 w32[ 4] = VA_BITS32_UNDEFINED;
4162 w32[ 5] = VA_BITS32_UNDEFINED;
4163 w32[ 6] = VA_BITS32_UNDEFINED;
4164 w32[ 7] = VA_BITS32_UNDEFINED;
4165 return;
4170 /* The same, but for when base is 8 % 16, which is the situation
4171 with RSP for amd64-ELF immediately after call instructions.
4173 if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
4174 /* Now we know the address range is suitably sized and aligned. */
4175 UWord a_lo = (UWord)(base);
4176 UWord a_hi = (UWord)(base + 128 - 1);
4177 /* FIXME: come up with a sane story on the wraparound case
4178 (which of course cnanot happen, but still..) */
4179 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4180 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4181 /* Now we know the entire range is within the main primary map. */
4182 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4183 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4184 if (LIKELY(pm_off_lo == pm_off_hi)) {
4185 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
4186 /* Now we know that the entire address range falls within a
4187 single secondary map, and that that secondary 'lives' in
4188 the main primary map. */
4189 SecMap* sm = get_secmap_for_writing_low(a_lo);
4190 UWord v_off16 = SM_OFF_16(a_lo);
4191 UShort* w16 = &sm->vabits16[v_off16];
4192 UInt* w32 = ASSUME_ALIGNED(UInt*, &w16[1]);
4193 /* The following assertion is commented out for obvious
4194 performance reasons, but was verified as valid when
4195 running the entire testsuite and also Firefox. */
4196 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4197 w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
4198 w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
4199 w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
4200 w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
4201 w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
4202 w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
4203 w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
4204 w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
4205 w16[15] = VA_BITS16_UNDEFINED; // w16[15]
4206 return;
4211 /* else fall into slow case */
4212 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
4213 make_mem_undefined(base, 128);
4217 /*------------------------------------------------------------*/
4218 /*--- Checking memory ---*/
4219 /*------------------------------------------------------------*/
4221 typedef
4222 enum {
4223 MC_Ok = 5,
4224 MC_AddrErr = 6,
4225 MC_ValueErr = 7
4227 MC_ReadResult;
4230 /* Check permissions for address range. If inadequate permissions
4231 exist, *bad_addr is set to the offending address, so the caller can
4232 know what it is. */
4234 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4235 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4236 indicate the lowest failing address. Functions below are
4237 similar. */
4238 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
4240 SizeT i;
4241 UWord vabits2;
4243 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
4244 for (i = 0; i < len; i++) {
4245 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
4246 vabits2 = get_vabits2(a);
4247 if (VA_BITS2_NOACCESS != vabits2) {
4248 if (bad_addr != NULL) *bad_addr = a;
4249 return False;
4251 a++;
4253 return True;
4256 static Bool is_mem_addressable ( Addr a, SizeT len,
4257 /*OUT*/Addr* bad_addr )
4259 SizeT i;
4260 UWord vabits2;
4262 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
4263 for (i = 0; i < len; i++) {
4264 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
4265 vabits2 = get_vabits2(a);
4266 if (VA_BITS2_NOACCESS == vabits2) {
4267 if (bad_addr != NULL) *bad_addr = a;
4268 return False;
4270 a++;
4272 return True;
4275 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
4276 /*OUT*/Addr* bad_addr,
4277 /*OUT*/UInt* otag )
4279 SizeT i;
4280 UWord vabits2;
4282 PROF_EVENT(MCPE_IS_MEM_DEFINED);
4283 DEBUG("is_mem_defined\n");
4285 if (otag) *otag = 0;
4286 if (bad_addr) *bad_addr = 0;
4287 for (i = 0; i < len; i++) {
4288 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
4289 vabits2 = get_vabits2(a);
4290 if (VA_BITS2_DEFINED != vabits2) {
4291 // Error! Nb: Report addressability errors in preference to
4292 // definedness errors. And don't report definedeness errors unless
4293 // --undef-value-errors=yes.
4294 if (bad_addr) {
4295 *bad_addr = a;
4297 if (VA_BITS2_NOACCESS == vabits2) {
4298 return MC_AddrErr;
4300 if (MC_(clo_mc_level) >= 2) {
4301 if (otag && MC_(clo_mc_level) == 3) {
4302 *otag = MC_(helperc_b_load1)( a );
4304 return MC_ValueErr;
4307 a++;
4309 return MC_Ok;
4313 /* Like is_mem_defined but doesn't give up at the first uninitialised
4314 byte -- the entire range is always checked. This is important for
4315 detecting errors in the case where a checked range strays into
4316 invalid memory, but that fact is not detected by the ordinary
4317 is_mem_defined(), because of an undefined section that precedes the
4318 out of range section, possibly as a result of an alignment hole in
4319 the checked data. This version always checks the entire range and
4320 can report both a definedness and an accessbility error, if
4321 necessary. */
4322 static void is_mem_defined_comprehensive (
4323 Addr a, SizeT len,
4324 /*OUT*/Bool* errorV, /* is there a definedness err? */
4325 /*OUT*/Addr* bad_addrV, /* if so where? */
4326 /*OUT*/UInt* otagV, /* and what's its otag? */
4327 /*OUT*/Bool* errorA, /* is there an addressability err? */
4328 /*OUT*/Addr* bad_addrA /* if so where? */
4331 SizeT i;
4332 UWord vabits2;
4333 Bool already_saw_errV = False;
4335 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
4336 DEBUG("is_mem_defined_comprehensive\n");
4338 tl_assert(!(*errorV || *errorA));
4340 for (i = 0; i < len; i++) {
4341 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
4342 vabits2 = get_vabits2(a);
4343 switch (vabits2) {
4344 case VA_BITS2_DEFINED:
4345 a++;
4346 break;
4347 case VA_BITS2_UNDEFINED:
4348 case VA_BITS2_PARTDEFINED:
4349 if (!already_saw_errV) {
4350 *errorV = True;
4351 *bad_addrV = a;
4352 if (MC_(clo_mc_level) == 3) {
4353 *otagV = MC_(helperc_b_load1)( a );
4354 } else {
4355 *otagV = 0;
4357 already_saw_errV = True;
4359 a++; /* keep going */
4360 break;
4361 case VA_BITS2_NOACCESS:
4362 *errorA = True;
4363 *bad_addrA = a;
4364 return; /* give up now. */
4365 default:
4366 tl_assert(0);
4372 /* Check a zero-terminated ascii string. Tricky -- don't want to
4373 examine the actual bytes, to find the end, until we're sure it is
4374 safe to do so. */
4376 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
4378 UWord vabits2;
4380 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
4381 DEBUG("mc_is_defined_asciiz\n");
4383 if (otag) *otag = 0;
4384 if (bad_addr) *bad_addr = 0;
4385 while (True) {
4386 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
4387 vabits2 = get_vabits2(a);
4388 if (VA_BITS2_DEFINED != vabits2) {
4389 // Error! Nb: Report addressability errors in preference to
4390 // definedness errors. And don't report definedeness errors unless
4391 // --undef-value-errors=yes.
4392 if (bad_addr) {
4393 *bad_addr = a;
4395 if (VA_BITS2_NOACCESS == vabits2) {
4396 return MC_AddrErr;
4398 if (MC_(clo_mc_level) >= 2) {
4399 if (otag && MC_(clo_mc_level) == 3) {
4400 *otag = MC_(helperc_b_load1)( a );
4402 return MC_ValueErr;
4405 /* Ok, a is safe to read. */
4406 if (* ((UChar*)a) == 0) {
4407 return MC_Ok;
4409 a++;
4414 /*------------------------------------------------------------*/
4415 /*--- Memory event handlers ---*/
4416 /*------------------------------------------------------------*/
4418 static
4419 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
4420 Addr base, SizeT size )
4422 Addr bad_addr;
4423 Bool ok = is_mem_addressable ( base, size, &bad_addr );
4425 if (!ok) {
4426 switch (part) {
4427 case Vg_CoreSysCall:
4428 MC_(record_memparam_error) ( tid, bad_addr,
4429 /*isAddrErr*/True, s, 0/*otag*/ );
4430 break;
4432 case Vg_CoreSignal:
4433 MC_(record_core_mem_error)( tid, s );
4434 break;
4436 default:
4437 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
4442 static
4443 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
4444 Addr base, SizeT size )
4446 UInt otag = 0;
4447 Addr bad_addr;
4448 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
4450 if (MC_Ok != res) {
4451 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4453 switch (part) {
4454 case Vg_CoreSysCall:
4455 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4456 isAddrErr ? 0 : otag );
4457 break;
4459 case Vg_CoreSysCallArgInMem:
4460 MC_(record_regparam_error) ( tid, s, otag );
4461 break;
4463 /* If we're being asked to jump to a silly address, record an error
4464 message before potentially crashing the entire system. */
4465 case Vg_CoreTranslate:
4466 MC_(record_jump_error)( tid, bad_addr );
4467 break;
4469 default:
4470 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
4475 static
4476 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
4477 const HChar* s, Addr str )
4479 MC_ReadResult res;
4480 Addr bad_addr = 0; // shut GCC up
4481 UInt otag = 0;
4483 tl_assert(part == Vg_CoreSysCall);
4484 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4485 if (MC_Ok != res) {
4486 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4487 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4488 isAddrErr ? 0 : otag );
4492 /* Handling of mmap and mprotect is not as simple as it seems.
4494 The underlying semantics are that memory obtained from mmap is
4495 always initialised, but may be inaccessible. And changes to the
4496 protection of memory do not change its contents and hence not its
4497 definedness state. Problem is we can't model
4498 inaccessible-but-with-some-definedness state; once we mark memory
4499 as inaccessible we lose all info about definedness, and so can't
4500 restore that if it is later made accessible again.
4502 One obvious thing to do is this:
4504 mmap/mprotect NONE -> noaccess
4505 mmap/mprotect other -> defined
4507 The problem case here is: taking accessible memory, writing
4508 uninitialised data to it, mprotecting it NONE and later mprotecting
4509 it back to some accessible state causes the undefinedness to be
4510 lost.
4512 A better proposal is:
4514 (1) mmap NONE -> make noaccess
4515 (2) mmap other -> make defined
4517 (3) mprotect NONE -> # no change
4518 (4) mprotect other -> change any "noaccess" to "defined"
4520 (2) is OK because memory newly obtained from mmap really is defined
4521 (zeroed out by the kernel -- doing anything else would
4522 constitute a massive security hole.)
4524 (1) is OK because the only way to make the memory usable is via
4525 (4), in which case we also wind up correctly marking it all as
4526 defined.
4528 (3) is the weak case. We choose not to change memory state.
4529 (presumably the range is in some mixture of "defined" and
4530 "undefined", viz, accessible but with arbitrary V bits). Doing
4531 nothing means we retain the V bits, so that if the memory is
4532 later mprotected "other", the V bits remain unchanged, so there
4533 can be no false negatives. The bad effect is that if there's
4534 an access in the area, then MC cannot warn; but at least we'll
4535 get a SEGV to show, so it's better than nothing.
4537 Consider the sequence (3) followed by (4). Any memory that was
4538 "defined" or "undefined" previously retains its state (as
4539 required). Any memory that was "noaccess" before can only have
4540 been made that way by (1), and so it's OK to change it to
4541 "defined".
4543 See https://bugs.kde.org/show_bug.cgi?id=205541
4544 and https://bugs.kde.org/show_bug.cgi?id=210268
4546 static
4547 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4548 ULong di_handle )
4550 if (rr || ww || xx) {
4551 /* (2) mmap/mprotect other -> defined */
4552 MC_(make_mem_defined)(a, len);
4553 } else {
4554 /* (1) mmap/mprotect NONE -> noaccess */
4555 MC_(make_mem_noaccess)(a, len);
4559 static
4560 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4562 if (rr || ww || xx) {
4563 /* (4) mprotect other -> change any "noaccess" to "defined" */
4564 make_mem_defined_if_noaccess(a, len);
4565 } else {
4566 /* (3) mprotect NONE -> # no change */
4567 /* do nothing */
4572 static
4573 void mc_new_mem_startup( Addr a, SizeT len,
4574 Bool rr, Bool ww, Bool xx, ULong di_handle )
4576 // Because code is defined, initialised variables get put in the data
4577 // segment and are defined, and uninitialised variables get put in the
4578 // bss segment and are auto-zeroed (and so defined).
4580 // It's possible that there will be padding between global variables.
4581 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4582 // a program uses it, Memcheck will not complain. This is arguably a
4583 // false negative, but it's a grey area -- the behaviour is defined (the
4584 // padding is zeroed) but it's probably not what the user intended. And
4585 // we can't avoid it.
4587 // Note: we generally ignore RWX permissions, because we can't track them
4588 // without requiring more than one A bit which would slow things down a
4589 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4590 // So we mark any such pages as "unaddressable".
4591 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4592 a, (ULong)len, rr, ww, xx);
4593 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4596 static
4597 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4599 MC_(make_mem_defined)(a, len);
4603 /*------------------------------------------------------------*/
4604 /*--- Register event handlers ---*/
4605 /*------------------------------------------------------------*/
4607 /* Try and get a nonzero origin for the guest state section of thread
4608 tid characterised by (offset,size). Return 0 if nothing to show
4609 for it. */
4610 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4611 Int offset, SizeT size )
4613 Int sh2off;
4614 UInt area[3];
4615 UInt otag;
4616 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4617 if (sh2off == -1)
4618 return 0; /* This piece of guest state is not tracked */
4619 tl_assert(sh2off >= 0);
4620 tl_assert(0 == (sh2off % 4));
4621 area[0] = 0x31313131;
4622 area[2] = 0x27272727;
4623 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4624 tl_assert(area[0] == 0x31313131);
4625 tl_assert(area[2] == 0x27272727);
4626 otag = area[1];
4627 return otag;
4631 /* When some chunk of guest state is written, mark the corresponding
4632 shadow area as valid. This is used to initialise arbitrarily large
4633 chunks of guest state, hence the _SIZE value, which has to be as
4634 big as the biggest guest state.
4636 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4637 PtrdiffT offset, SizeT size)
4639 # define MAX_REG_WRITE_SIZE 2264
4640 UChar area[MAX_REG_WRITE_SIZE];
4641 tl_assert(size <= MAX_REG_WRITE_SIZE);
4642 VG_(memset)(area, V_BITS8_DEFINED, size);
4643 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4644 # undef MAX_REG_WRITE_SIZE
4647 static
4648 void mc_post_reg_write_clientcall ( ThreadId tid,
4649 PtrdiffT offset, SizeT size, Addr f)
4651 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4654 /* Look at the definedness of the guest's shadow state for
4655 [offset, offset+len). If any part of that is undefined, record
4656 a parameter error.
4658 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4659 PtrdiffT offset, SizeT size)
4661 Int i;
4662 Bool bad;
4663 UInt otag;
4665 UChar area[16];
4666 tl_assert(size <= 16);
4668 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4670 bad = False;
4671 for (i = 0; i < size; i++) {
4672 if (area[i] != V_BITS8_DEFINED) {
4673 bad = True;
4674 break;
4678 if (!bad)
4679 return;
4681 /* We've found some undefinedness. See if we can also find an
4682 origin for it. */
4683 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4684 MC_(record_regparam_error) ( tid, s, otag );
4688 /*------------------------------------------------------------*/
4689 /*--- Register-memory event handlers ---*/
4690 /*------------------------------------------------------------*/
4692 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4693 PtrdiffT guest_state_offset, SizeT size )
4695 SizeT i;
4696 UChar vbits8;
4697 Int offset;
4698 UInt d32;
4700 /* Slow loop. */
4701 for (i = 0; i < size; i++) {
4702 get_vbits8( a+i, &vbits8 );
4703 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4704 1, &vbits8 );
4707 if (MC_(clo_mc_level) != 3)
4708 return;
4710 /* Track origins. */
4711 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4712 if (offset == -1)
4713 return;
4715 switch (size) {
4716 case 1:
4717 d32 = MC_(helperc_b_load1)( a );
4718 break;
4719 case 2:
4720 d32 = MC_(helperc_b_load2)( a );
4721 break;
4722 case 4:
4723 d32 = MC_(helperc_b_load4)( a );
4724 break;
4725 case 8:
4726 d32 = MC_(helperc_b_load8)( a );
4727 break;
4728 case 16:
4729 d32 = MC_(helperc_b_load16)( a );
4730 break;
4731 case 32:
4732 d32 = MC_(helperc_b_load32)( a );
4733 break;
4734 default:
4735 tl_assert(0);
4738 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4741 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4742 PtrdiffT guest_state_offset, Addr a,
4743 SizeT size )
4745 SizeT i;
4746 UChar vbits8;
4747 Int offset;
4748 UInt d32;
4750 /* Slow loop. */
4751 for (i = 0; i < size; i++) {
4752 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4753 guest_state_offset+i, 1 );
4754 set_vbits8( a+i, vbits8 );
4757 if (MC_(clo_mc_level) != 3)
4758 return;
4760 /* Track origins. */
4761 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4762 if (offset == -1)
4763 return;
4765 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4766 switch (size) {
4767 case 1:
4768 MC_(helperc_b_store1)( a, d32 );
4769 break;
4770 case 2:
4771 MC_(helperc_b_store2)( a, d32 );
4772 break;
4773 case 4:
4774 MC_(helperc_b_store4)( a, d32 );
4775 break;
4776 case 8:
4777 MC_(helperc_b_store8)( a, d32 );
4778 break;
4779 case 16:
4780 MC_(helperc_b_store16)( a, d32 );
4781 break;
4782 case 32:
4783 MC_(helperc_b_store32)( a, d32 );
4784 break;
4785 default:
4786 tl_assert(0);
4791 /*------------------------------------------------------------*/
4792 /*--- Some static assertions ---*/
4793 /*------------------------------------------------------------*/
4795 /* The handwritten assembly helpers below have baked-in assumptions
4796 about various constant values. These assertions attempt to make
4797 that a bit safer by checking those values and flagging changes that
4798 would make the assembly invalid. Not perfect but it's better than
4799 nothing. */
4801 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4803 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4804 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4806 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4807 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4809 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4810 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4812 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4813 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4815 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4816 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4818 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4819 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4822 /*------------------------------------------------------------*/
4823 /*--- Functions called directly from generated code: ---*/
4824 /*--- Load/store handlers. ---*/
4825 /*------------------------------------------------------------*/
4827 /* Types: LOADV32, LOADV16, LOADV8 are:
4828 UWord fn ( Addr a )
4829 so they return 32-bits on 32-bit machines and 64-bits on
4830 64-bit machines. Addr has the same size as a host word.
4832 LOADV64 is always ULong fn ( Addr a )
4834 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4835 are a UWord, and for STOREV64 they are a ULong.
4838 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4839 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4840 primary map. This is all very tricky (and important!), so let's
4841 work through the maths by hand (below), *and* assert for these
4842 values at startup. */
4843 #define MASK(_szInBytes) \
4844 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4846 /* MASK only exists so as to define this macro. */
4847 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4848 ((_a) & MASK((_szInBits>>3)))
4850 /* On a 32-bit machine:
4852 N_PRIMARY_BITS == 16, so
4853 N_PRIMARY_MAP == 0x10000, so
4854 N_PRIMARY_MAP-1 == 0xFFFF, so
4855 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4857 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4858 = ~ ( 0xFFFF | 0xFFFF0000 )
4859 = ~ 0xFFFF'FFFF
4862 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4863 = ~ ( 0xFFFE | 0xFFFF0000 )
4864 = ~ 0xFFFF'FFFE
4867 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4868 = ~ ( 0xFFFC | 0xFFFF0000 )
4869 = ~ 0xFFFF'FFFC
4872 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4873 = ~ ( 0xFFF8 | 0xFFFF0000 )
4874 = ~ 0xFFFF'FFF8
4877 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4878 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4879 the 1-byte alignment case, it is always a zero value, since MASK(1)
4880 is zero. All as expected.
4882 On a 64-bit machine, it's more complex, since we're testing
4883 simultaneously for misalignment and for the address being at or
4884 above 64G:
4886 N_PRIMARY_BITS == 20, so
4887 N_PRIMARY_MAP == 0x100000, so
4888 N_PRIMARY_MAP-1 == 0xFFFFF, so
4889 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4891 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4892 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4893 = ~ 0xF'FFFF'FFFF
4894 = 0xFFFF'FFF0'0000'0000
4896 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4897 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4898 = ~ 0xF'FFFF'FFFE
4899 = 0xFFFF'FFF0'0000'0001
4901 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4902 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4903 = ~ 0xF'FFFF'FFFC
4904 = 0xFFFF'FFF0'0000'0003
4906 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4907 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4908 = ~ 0xF'FFFF'FFF8
4909 = 0xFFFF'FFF0'0000'0007
4912 /*------------------------------------------------------------*/
4913 /*--- LOADV256 and LOADV128 ---*/
4914 /*------------------------------------------------------------*/
4916 static INLINE
4917 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4918 Addr a, SizeT nBits, Bool isBigEndian )
4920 PROF_EVENT(MCPE_LOADV_128_OR_256);
4922 #ifndef PERF_FAST_LOADV
4923 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4924 return;
4925 #else
4927 UWord sm_off16, vabits16, j;
4928 UWord nBytes = nBits / 8;
4929 UWord nULongs = nBytes / 8;
4930 SecMap* sm;
4932 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4933 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4934 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4935 return;
4938 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4939 suitably aligned, is mapped, and addressible. */
4940 for (j = 0; j < nULongs; j++) {
4941 sm = get_secmap_for_reading_low(a + 8*j);
4942 sm_off16 = SM_OFF_16(a + 8*j);
4943 vabits16 = sm->vabits16[sm_off16];
4945 // Convert V bits from compact memory form to expanded
4946 // register form.
4947 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4948 res[j] = V_BITS64_DEFINED;
4949 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4950 res[j] = V_BITS64_UNDEFINED;
4951 } else {
4952 /* Slow case: some block of 8 bytes are not all-defined or
4953 all-undefined. */
4954 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4955 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4956 return;
4959 return;
4961 #endif
4964 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4966 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4968 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4970 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4973 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4975 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4977 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4979 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4982 /*------------------------------------------------------------*/
4983 /*--- LOADV64 ---*/
4984 /*------------------------------------------------------------*/
4986 static INLINE
4987 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4989 PROF_EVENT(MCPE_LOADV64);
4991 #ifndef PERF_FAST_LOADV
4992 return mc_LOADVn_slow( a, 64, isBigEndian );
4993 #else
4995 UWord sm_off16, vabits16;
4996 SecMap* sm;
4998 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4999 PROF_EVENT(MCPE_LOADV64_SLOW1);
5000 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
5003 sm = get_secmap_for_reading_low(a);
5004 sm_off16 = SM_OFF_16(a);
5005 vabits16 = sm->vabits16[sm_off16];
5007 // Handle common case quickly: a is suitably aligned, is mapped, and
5008 // addressible.
5009 // Convert V bits from compact memory form to expanded register form.
5010 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
5011 return V_BITS64_DEFINED;
5012 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
5013 return V_BITS64_UNDEFINED;
5014 } else {
5015 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
5016 PROF_EVENT(MCPE_LOADV64_SLOW2);
5017 return mc_LOADVn_slow( a, 64, isBigEndian );
5020 #endif
5023 // Generic for all platforms
5024 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
5026 return mc_LOADV64(a, True);
5029 // Non-generic assembly for arm32-linux
5030 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5031 && defined(VGP_arm_linux)
5032 /* See mc_main_asm.c */
5034 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5035 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
5036 /* See mc_main_asm.c */
5038 #else
5039 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5040 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
5042 return mc_LOADV64(a, False);
5044 #endif
5046 /*------------------------------------------------------------*/
5047 /*--- STOREV64 ---*/
5048 /*------------------------------------------------------------*/
5050 static INLINE
5051 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
5053 PROF_EVENT(MCPE_STOREV64);
5055 #ifndef PERF_FAST_STOREV
5056 // XXX: this slow case seems to be marginally faster than the fast case!
5057 // Investigate further.
5058 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5059 #else
5061 UWord sm_off16, vabits16;
5062 SecMap* sm;
5064 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
5065 PROF_EVENT(MCPE_STOREV64_SLOW1);
5066 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5067 return;
5070 sm = get_secmap_for_reading_low(a);
5071 sm_off16 = SM_OFF_16(a);
5072 vabits16 = sm->vabits16[sm_off16];
5074 // To understand the below cleverness, see the extensive comments
5075 // in MC_(helperc_STOREV8).
5076 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
5077 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
5078 return;
5080 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
5081 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
5082 return;
5084 PROF_EVENT(MCPE_STOREV64_SLOW2);
5085 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5086 return;
5088 if (V_BITS64_UNDEFINED == vbits64) {
5089 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
5090 return;
5092 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
5093 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
5094 return;
5096 PROF_EVENT(MCPE_STOREV64_SLOW3);
5097 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5098 return;
5101 PROF_EVENT(MCPE_STOREV64_SLOW4);
5102 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5104 #endif
5107 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
5109 mc_STOREV64(a, vbits64, True);
5111 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
5113 mc_STOREV64(a, vbits64, False);
5116 /*------------------------------------------------------------*/
5117 /*--- LOADV32 ---*/
5118 /*------------------------------------------------------------*/
5120 static INLINE
5121 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
5123 PROF_EVENT(MCPE_LOADV32);
5125 #ifndef PERF_FAST_LOADV
5126 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5127 #else
5129 UWord sm_off, vabits8;
5130 SecMap* sm;
5132 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5133 PROF_EVENT(MCPE_LOADV32_SLOW1);
5134 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5137 sm = get_secmap_for_reading_low(a);
5138 sm_off = SM_OFF(a);
5139 vabits8 = sm->vabits8[sm_off];
5141 // Handle common case quickly: a is suitably aligned, is mapped, and the
5142 // entire word32 it lives in is addressible.
5143 // Convert V bits from compact memory form to expanded register form.
5144 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5145 // Almost certainly not necessary, but be paranoid.
5146 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5147 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
5148 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
5149 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
5150 } else {
5151 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5152 PROF_EVENT(MCPE_LOADV32_SLOW2);
5153 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5156 #endif
5159 // Generic for all platforms
5160 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
5162 return mc_LOADV32(a, True);
5165 // Non-generic assembly for arm32-linux
5166 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5167 && defined(VGP_arm_linux)
5168 /* See mc_main_asm.c */
5170 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5171 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5172 /* See mc_main_asm.c */
5174 #else
5175 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5176 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
5178 return mc_LOADV32(a, False);
5180 #endif
5182 /*------------------------------------------------------------*/
5183 /*--- STOREV32 ---*/
5184 /*------------------------------------------------------------*/
5186 static INLINE
5187 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
5189 PROF_EVENT(MCPE_STOREV32);
5191 #ifndef PERF_FAST_STOREV
5192 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5193 #else
5195 UWord sm_off, vabits8;
5196 SecMap* sm;
5198 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5199 PROF_EVENT(MCPE_STOREV32_SLOW1);
5200 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5201 return;
5204 sm = get_secmap_for_reading_low(a);
5205 sm_off = SM_OFF(a);
5206 vabits8 = sm->vabits8[sm_off];
5208 // To understand the below cleverness, see the extensive comments
5209 // in MC_(helperc_STOREV8).
5210 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
5211 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
5212 return;
5214 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
5215 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
5216 return;
5218 PROF_EVENT(MCPE_STOREV32_SLOW2);
5219 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5220 return;
5222 if (V_BITS32_UNDEFINED == vbits32) {
5223 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
5224 return;
5226 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
5227 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
5228 return;
5230 PROF_EVENT(MCPE_STOREV32_SLOW3);
5231 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5232 return;
5235 PROF_EVENT(MCPE_STOREV32_SLOW4);
5236 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5238 #endif
5241 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
5243 mc_STOREV32(a, vbits32, True);
5245 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
5247 mc_STOREV32(a, vbits32, False);
5250 /*------------------------------------------------------------*/
5251 /*--- LOADV16 ---*/
5252 /*------------------------------------------------------------*/
5254 static INLINE
5255 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
5257 PROF_EVENT(MCPE_LOADV16);
5259 #ifndef PERF_FAST_LOADV
5260 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5261 #else
5263 UWord sm_off, vabits8;
5264 SecMap* sm;
5266 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5267 PROF_EVENT(MCPE_LOADV16_SLOW1);
5268 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5271 sm = get_secmap_for_reading_low(a);
5272 sm_off = SM_OFF(a);
5273 vabits8 = sm->vabits8[sm_off];
5274 // Handle common case quickly: a is suitably aligned, is mapped, and is
5275 // addressible.
5276 // Convert V bits from compact memory form to expanded register form
5277 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
5278 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
5279 else {
5280 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5281 // the two sub-bytes.
5282 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
5283 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
5284 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
5285 else {
5286 /* Slow case: the two bytes are not all-defined or all-undefined. */
5287 PROF_EVENT(MCPE_LOADV16_SLOW2);
5288 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5292 #endif
5295 // Generic for all platforms
5296 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
5298 return mc_LOADV16(a, True);
5301 // Non-generic assembly for arm32-linux
5302 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5303 && defined(VGP_arm_linux)
5304 __asm__( /* Derived from NCode template */
5305 ".text \n"
5306 ".align 2 \n"
5307 ".global vgMemCheck_helperc_LOADV16le \n"
5308 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5309 "vgMemCheck_helperc_LOADV16le: \n" //
5310 " tst r0, #1 \n" //
5311 " bne .LLV16LEc12 \n" // if misaligned
5312 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5313 " movw r3, #:lower16:primary_map \n" //
5314 " uxth r1, r0 \n" // r1 = sec-map-offB
5315 " movt r3, #:upper16:primary_map \n" //
5316 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5317 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5318 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5319 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5320 ".LLV16LEh9: \n" //
5321 " mov r0, #0xFFFFFFFF \n" //
5322 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5323 " bx lr \n" //
5324 ".LLV16LEc0: \n" //
5325 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5326 " bne .LLV16LEc4 \n" //
5327 ".LLV16LEc2: \n" //
5328 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5329 " bx lr \n" //
5330 ".LLV16LEc4: \n" //
5331 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5332 // Extract the relevant 4 bits and inspect.
5333 " and r2, r0, #2 \n" // addr & 2
5334 " add r2, r2, r2 \n" // 2 * (addr & 2)
5335 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5336 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5338 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5339 " beq .LLV16LEh9 \n" //
5341 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5342 " beq .LLV16LEc2 \n" //
5344 ".LLV16LEc12: \n" //
5345 " push {r4, lr} \n" //
5346 " mov r2, #0 \n" //
5347 " mov r1, #16 \n" //
5348 " bl mc_LOADVn_slow \n" //
5349 " pop {r4, pc} \n" //
5350 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5351 ".previous\n"
5354 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5355 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5356 __asm__(
5357 ".text\n"
5358 ".align 16\n"
5359 ".global vgMemCheck_helperc_LOADV16le\n"
5360 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5361 "vgMemCheck_helperc_LOADV16le:\n"
5362 " test $0x1, %eax\n"
5363 " jne .LLV16LE5\n" /* jump if not aligned */
5364 " mov %eax, %edx\n"
5365 " shr $0x10, %edx\n"
5366 " mov primary_map(,%edx,4), %ecx\n"
5367 " movzwl %ax, %edx\n"
5368 " shr $0x2, %edx\n"
5369 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5370 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5371 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5372 ".LLV16LE1:\n"
5373 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5374 " ret\n"
5375 ".LLV16LE2:\n"
5376 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5377 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5378 ".LLV16LE3:\n"
5379 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5380 " ret\n"
5381 ".LLV16LE4:\n"
5382 " mov %eax, %ecx\n"
5383 " and $0x2, %ecx\n"
5384 " add %ecx, %ecx\n"
5385 " sar %cl, %edx\n"
5386 " and $0xf, %edx\n"
5387 " cmp $0xa, %edx\n"
5388 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5389 " cmp $0x5, %edx\n"
5390 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5391 ".LLV16LE5:\n"
5392 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5393 " mov $16, %edx\n"
5394 " jmp mc_LOADVn_slow\n"
5395 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5396 ".previous\n"
5399 #else
5400 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5401 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5403 return mc_LOADV16(a, False);
5405 #endif
5407 /*------------------------------------------------------------*/
5408 /*--- STOREV16 ---*/
5409 /*------------------------------------------------------------*/
5411 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5412 static INLINE
5413 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5415 UInt shift;
5416 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5417 shift = (a & 2) << 1; // shift by 0 or 4
5418 vabits8 >>= shift; // shift the four bits to the bottom
5419 // check 2 x vabits2 != VA_BITS2_NOACCESS
5420 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5421 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5424 static INLINE
5425 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5427 PROF_EVENT(MCPE_STOREV16);
5429 #ifndef PERF_FAST_STOREV
5430 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5431 #else
5433 UWord sm_off, vabits8;
5434 SecMap* sm;
5436 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5437 PROF_EVENT(MCPE_STOREV16_SLOW1);
5438 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5439 return;
5442 sm = get_secmap_for_reading_low(a);
5443 sm_off = SM_OFF(a);
5444 vabits8 = sm->vabits8[sm_off];
5446 // To understand the below cleverness, see the extensive comments
5447 // in MC_(helperc_STOREV8).
5448 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5449 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5450 return;
5452 if (!is_distinguished_sm(sm)
5453 && accessible_vabits4_in_vabits8(a, vabits8)) {
5454 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5455 &(sm->vabits8[sm_off]) );
5456 return;
5458 PROF_EVENT(MCPE_STOREV16_SLOW2);
5459 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5461 if (V_BITS16_UNDEFINED == vbits16) {
5462 if (vabits8 == VA_BITS8_UNDEFINED) {
5463 return;
5465 if (!is_distinguished_sm(sm)
5466 && accessible_vabits4_in_vabits8(a, vabits8)) {
5467 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5468 &(sm->vabits8[sm_off]) );
5469 return;
5471 PROF_EVENT(MCPE_STOREV16_SLOW3);
5472 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5473 return;
5476 PROF_EVENT(MCPE_STOREV16_SLOW4);
5477 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5479 #endif
5483 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5485 mc_STOREV16(a, vbits16, True);
5487 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5489 mc_STOREV16(a, vbits16, False);
5492 /*------------------------------------------------------------*/
5493 /*--- LOADV8 ---*/
5494 /*------------------------------------------------------------*/
5496 /* Note: endianness is irrelevant for size == 1 */
5498 // Non-generic assembly for arm32-linux
5499 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5500 && defined(VGP_arm_linux)
5501 __asm__( /* Derived from NCode template */
5502 ".text \n"
5503 ".align 2 \n"
5504 ".global vgMemCheck_helperc_LOADV8 \n"
5505 ".type vgMemCheck_helperc_LOADV8, %function \n"
5506 "vgMemCheck_helperc_LOADV8: \n" //
5507 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5508 " movw r3, #:lower16:primary_map \n" //
5509 " uxth r1, r0 \n" // r1 = sec-map-offB
5510 " movt r3, #:upper16:primary_map \n" //
5511 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5512 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5513 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5514 " bne .LLV8c0 \n" // no, goto .LLV8c0
5515 ".LLV8h9: \n" //
5516 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5517 " bx lr \n" //
5518 ".LLV8c0: \n" //
5519 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5520 " bne .LLV8c4 \n" //
5521 ".LLV8c2: \n" //
5522 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5523 " bx lr \n" //
5524 ".LLV8c4: \n" //
5525 // r1 holds sec-map-VABITS8
5526 // r0 holds the address. Extract the relevant 2 bits and inspect.
5527 " and r2, r0, #3 \n" // addr & 3
5528 " add r2, r2, r2 \n" // 2 * (addr & 3)
5529 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5530 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5532 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5533 " beq .LLV8h9 \n" //
5535 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5536 " beq .LLV8c2 \n" //
5538 " push {r4, lr} \n" //
5539 " mov r2, #0 \n" //
5540 " mov r1, #8 \n" //
5541 " bl mc_LOADVn_slow \n" //
5542 " pop {r4, pc} \n" //
5543 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5544 ".previous\n"
5547 /* Non-generic assembly for x86-linux */
5548 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5549 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5550 __asm__(
5551 ".text\n"
5552 ".align 16\n"
5553 ".global vgMemCheck_helperc_LOADV8\n"
5554 ".type vgMemCheck_helperc_LOADV8, @function\n"
5555 "vgMemCheck_helperc_LOADV8:\n"
5556 " mov %eax, %edx\n"
5557 " shr $0x10, %edx\n"
5558 " mov primary_map(,%edx,4), %ecx\n"
5559 " movzwl %ax, %edx\n"
5560 " shr $0x2, %edx\n"
5561 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5562 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5563 " jne .LLV8LE2\n" /* jump if not defined */
5564 ".LLV8LE1:\n"
5565 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5566 " ret\n"
5567 ".LLV8LE2:\n"
5568 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5569 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5570 ".LLV8LE3:\n"
5571 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5572 " ret\n"
5573 ".LLV8LE4:\n"
5574 " mov %eax, %ecx\n"
5575 " and $0x3, %ecx\n"
5576 " add %ecx, %ecx\n"
5577 " sar %cl, %edx\n"
5578 " and $0x3, %edx\n"
5579 " cmp $0x2, %edx\n"
5580 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5581 " cmp $0x1, %edx\n"
5582 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5583 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5584 " mov $0x8, %edx\n"
5585 " jmp mc_LOADVn_slow\n"
5586 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5587 ".previous\n"
5590 #else
5591 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5592 VG_REGPARM(1)
5593 UWord MC_(helperc_LOADV8) ( Addr a )
5595 PROF_EVENT(MCPE_LOADV8);
5597 #ifndef PERF_FAST_LOADV
5598 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5599 #else
5601 UWord sm_off, vabits8;
5602 SecMap* sm;
5604 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5605 PROF_EVENT(MCPE_LOADV8_SLOW1);
5606 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5609 sm = get_secmap_for_reading_low(a);
5610 sm_off = SM_OFF(a);
5611 vabits8 = sm->vabits8[sm_off];
5612 // Convert V bits from compact memory form to expanded register form
5613 // Handle common case quickly: a is mapped, and the entire
5614 // word32 it lives in is addressible.
5615 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5616 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5617 else {
5618 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5619 // the single byte.
5620 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5621 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5622 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5623 else {
5624 /* Slow case: the byte is not all-defined or all-undefined. */
5625 PROF_EVENT(MCPE_LOADV8_SLOW2);
5626 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5630 #endif
5632 #endif
5634 /*------------------------------------------------------------*/
5635 /*--- STOREV8 ---*/
5636 /*------------------------------------------------------------*/
5638 VG_REGPARM(2)
5639 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5641 PROF_EVENT(MCPE_STOREV8);
5643 #ifndef PERF_FAST_STOREV
5644 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5645 #else
5647 UWord sm_off, vabits8;
5648 SecMap* sm;
5650 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5651 PROF_EVENT(MCPE_STOREV8_SLOW1);
5652 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5653 return;
5656 sm = get_secmap_for_reading_low(a);
5657 sm_off = SM_OFF(a);
5658 vabits8 = sm->vabits8[sm_off];
5660 // Clevernesses to speed up storing V bits.
5661 // The 64/32/16 bit cases also have similar clevernesses, but it
5662 // works a little differently to the code below.
5664 // Cleverness 1: sometimes we don't have to write the shadow memory at
5665 // all, if we can tell that what we want to write is the same as what is
5666 // already there. These cases are marked below as "defined on defined" and
5667 // "undefined on undefined".
5669 // Cleverness 2:
5670 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5671 // be written in the secondary map. V bits can be directly written
5672 // if 4 conditions are respected:
5673 // * The address for which V bits are written is naturally aligned
5674 // on 1 byte for STOREV8 (this is always true)
5675 // on 2 bytes for STOREV16
5676 // on 4 bytes for STOREV32
5677 // on 8 bytes for STOREV64.
5678 // * V bits being written are either fully defined or fully undefined.
5679 // (for partially defined V bits, V bits cannot be directly written,
5680 // as the secondary vbits table must be maintained).
5681 // * the secmap is not distinguished (distinguished maps cannot be
5682 // modified).
5683 // * the memory corresponding to the V bits being written is
5684 // accessible (if one or more bytes are not accessible,
5685 // we must call mc_STOREVn_slow in order to report accessibility
5686 // errors).
5687 // Note that for STOREV32 and STOREV64, it is too expensive
5688 // to verify the accessibility of each byte for the benefit it
5689 // brings. Instead, a quicker check is done by comparing to
5690 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5691 // but misses some opportunity of direct modifications.
5692 // Checking each byte accessibility was measured for
5693 // STOREV32+perf tests and was slowing down all perf tests.
5694 // The cases corresponding to cleverness 2 are marked below as
5695 // "direct mod".
5696 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5697 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5698 return; // defined on defined
5700 if (!is_distinguished_sm(sm)
5701 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5702 // direct mod
5703 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5704 &(sm->vabits8[sm_off]) );
5705 return;
5707 PROF_EVENT(MCPE_STOREV8_SLOW2);
5708 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5709 return;
5711 if (V_BITS8_UNDEFINED == vbits8) {
5712 if (vabits8 == VA_BITS8_UNDEFINED) {
5713 return; // undefined on undefined
5715 if (!is_distinguished_sm(sm)
5716 && (VA_BITS2_NOACCESS
5717 != extract_vabits2_from_vabits8(a, vabits8))) {
5718 // direct mod
5719 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5720 &(sm->vabits8[sm_off]) );
5721 return;
5723 PROF_EVENT(MCPE_STOREV8_SLOW3);
5724 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5725 return;
5728 // Partially defined word
5729 PROF_EVENT(MCPE_STOREV8_SLOW4);
5730 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5732 #endif
5736 /*------------------------------------------------------------*/
5737 /*--- Functions called directly from generated code: ---*/
5738 /*--- Value-check failure handlers. ---*/
5739 /*------------------------------------------------------------*/
5741 /* Call these ones when an origin is available ... */
5742 VG_REGPARM(1)
5743 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5744 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5747 VG_REGPARM(1)
5748 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5749 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5752 VG_REGPARM(1)
5753 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5754 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5757 VG_REGPARM(1)
5758 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5759 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5762 VG_REGPARM(2)
5763 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5764 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5767 /* ... and these when an origin isn't available. */
5769 VG_REGPARM(0)
5770 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5771 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5774 VG_REGPARM(0)
5775 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5776 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5779 VG_REGPARM(0)
5780 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5781 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5784 VG_REGPARM(0)
5785 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5786 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5789 VG_REGPARM(1)
5790 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5791 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5795 /*------------------------------------------------------------*/
5796 /*--- Metadata get/set functions, for client requests. ---*/
5797 /*------------------------------------------------------------*/
5799 // Nb: this expands the V+A bits out into register-form V bits, even though
5800 // they're in memory. This is for backward compatibility, and because it's
5801 // probably what the user wants.
5803 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5804 error [no longer used], 3 == addressing error. */
5805 /* Nb: We used to issue various definedness/addressability errors from here,
5806 but we took them out because they ranged from not-very-helpful to
5807 downright annoying, and they complicated the error data structures. */
5808 static Int mc_get_or_set_vbits_for_client (
5809 Addr a,
5810 Addr vbits,
5811 SizeT szB,
5812 Bool setting, /* True <=> set vbits, False <=> get vbits */
5813 Bool is_client_request /* True <=> real user request
5814 False <=> internal call from gdbserver */
5817 SizeT i;
5818 Bool ok;
5819 UChar vbits8;
5821 /* Check that arrays are addressible before doing any getting/setting.
5822 vbits to be checked only for real user request. */
5823 for (i = 0; i < szB; i++) {
5824 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5825 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5826 return 3;
5830 /* Do the copy */
5831 if (setting) {
5832 /* setting */
5833 for (i = 0; i < szB; i++) {
5834 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5835 tl_assert(ok);
5837 } else {
5838 /* getting */
5839 for (i = 0; i < szB; i++) {
5840 ok = get_vbits8(a + i, &vbits8);
5841 tl_assert(ok);
5842 ((UChar*)vbits)[i] = vbits8;
5844 if (is_client_request)
5845 // The bytes in vbits[] have now been set, so mark them as such.
5846 MC_(make_mem_defined)(vbits, szB);
5849 return 1;
5853 /*------------------------------------------------------------*/
5854 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5855 /*------------------------------------------------------------*/
5857 /* For the memory leak detector, say whether an entire 64k chunk of
5858 address space is possibly in use, or not. If in doubt return
5859 True.
5861 Bool MC_(is_within_valid_secondary) ( Addr a )
5863 SecMap* sm = maybe_get_secmap_for ( a );
5864 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5865 /* Definitely not in use. */
5866 return False;
5867 } else {
5868 return True;
5873 /* For the memory leak detector, say whether or not a given word
5874 address is to be regarded as valid. */
5875 Bool MC_(is_valid_aligned_word) ( Addr a )
5877 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5878 tl_assert(VG_IS_WORD_ALIGNED(a));
5879 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5880 return False;
5881 if (sizeof(UWord) == 8) {
5882 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5883 return False;
5885 if (UNLIKELY(MC_(in_ignored_range)(a)))
5886 return False;
5887 else
5888 return True;
5892 /*------------------------------------------------------------*/
5893 /*--- Initialisation ---*/
5894 /*------------------------------------------------------------*/
5896 static void init_shadow_memory ( void )
5898 Int i;
5899 SecMap* sm;
5901 tl_assert(V_BIT_UNDEFINED == 1);
5902 tl_assert(V_BIT_DEFINED == 0);
5903 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5904 tl_assert(V_BITS8_DEFINED == 0);
5906 /* Build the 3 distinguished secondaries */
5907 sm = &sm_distinguished[SM_DIST_NOACCESS];
5908 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5910 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5911 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5913 sm = &sm_distinguished[SM_DIST_DEFINED];
5914 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5916 /* Set up the primary map. */
5917 /* These entries gradually get overwritten as the used address
5918 space expands. */
5919 for (i = 0; i < N_PRIMARY_MAP; i++)
5920 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5922 /* Auxiliary primary maps */
5923 init_auxmap_L1_L2();
5925 /* auxmap_size = auxmap_used = 0;
5926 no ... these are statically initialised */
5928 /* Secondary V bit table */
5929 secVBitTable = createSecVBitTable();
5933 /*------------------------------------------------------------*/
5934 /*--- Sanity check machinery (permanently engaged) ---*/
5935 /*------------------------------------------------------------*/
5937 static Bool mc_cheap_sanity_check ( void )
5939 n_sanity_cheap++;
5940 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5941 /* Check for sane operating level */
5942 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5943 return False;
5944 /* nothing else useful we can rapidly check */
5945 return True;
5948 static Bool mc_expensive_sanity_check ( void )
5950 Int i;
5951 Word n_secmaps_found;
5952 SecMap* sm;
5953 const HChar* errmsg;
5954 Bool bad = False;
5956 if (0) VG_(printf)("expensive sanity check\n");
5957 if (0) return True;
5959 n_sanity_expensive++;
5960 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5962 /* Check for sane operating level */
5963 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5964 return False;
5966 /* Check that the 3 distinguished SMs are still as they should be. */
5968 /* Check noaccess DSM. */
5969 sm = &sm_distinguished[SM_DIST_NOACCESS];
5970 for (i = 0; i < SM_CHUNKS; i++)
5971 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5972 bad = True;
5974 /* Check undefined DSM. */
5975 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5976 for (i = 0; i < SM_CHUNKS; i++)
5977 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5978 bad = True;
5980 /* Check defined DSM. */
5981 sm = &sm_distinguished[SM_DIST_DEFINED];
5982 for (i = 0; i < SM_CHUNKS; i++)
5983 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5984 bad = True;
5986 if (bad) {
5987 VG_(printf)("memcheck expensive sanity: "
5988 "distinguished_secondaries have changed\n");
5989 return False;
5992 /* If we're not checking for undefined value errors, the secondary V bit
5993 * table should be empty. */
5994 if (MC_(clo_mc_level) == 1) {
5995 if (0 != VG_(OSetGen_Size)(secVBitTable))
5996 return False;
5999 /* check the auxiliary maps, very thoroughly */
6000 n_secmaps_found = 0;
6001 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
6002 if (errmsg) {
6003 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
6004 return False;
6007 /* n_secmaps_found is now the number referred to by the auxiliary
6008 primary map. Now add on the ones referred to by the main
6009 primary map. */
6010 for (i = 0; i < N_PRIMARY_MAP; i++) {
6011 if (primary_map[i] == NULL) {
6012 bad = True;
6013 } else {
6014 if (!is_distinguished_sm(primary_map[i]))
6015 n_secmaps_found++;
6019 /* check that the number of secmaps issued matches the number that
6020 are reachable (iow, no secmap leaks) */
6021 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
6022 bad = True;
6024 if (bad) {
6025 VG_(printf)("memcheck expensive sanity: "
6026 "apparent secmap leakage\n");
6027 return False;
6030 if (bad) {
6031 VG_(printf)("memcheck expensive sanity: "
6032 "auxmap covers wrong address space\n");
6033 return False;
6036 /* there is only one pointer to each secmap (expensive) */
6038 return True;
6041 /*------------------------------------------------------------*/
6042 /*--- Command line args ---*/
6043 /*------------------------------------------------------------*/
6045 /* 31 Aug 2015: Vectorised code is now so widespread that
6046 --partial-loads-ok needs to be enabled by default on all platforms.
6047 Not doing so causes lots of false errors. */
6048 Bool MC_(clo_partial_loads_ok) = True;
6049 Long MC_(clo_freelist_vol) = 20LL*1000LL*1000LL;
6050 Long MC_(clo_freelist_big_blocks) = 1LL*1000LL*1000LL;
6051 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
6052 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
6053 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
6054 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
6055 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString)
6056 | H2S( LchLength64)
6057 | H2S( LchNewArray)
6058 | H2S( LchMultipleInheritance);
6059 Bool MC_(clo_xtree_leak) = False;
6060 const HChar* MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
6061 Bool MC_(clo_workaround_gcc296_bugs) = False;
6062 Int MC_(clo_malloc_fill) = -1;
6063 Int MC_(clo_free_fill) = -1;
6064 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free;
6065 Int MC_(clo_mc_level) = 2;
6066 Bool MC_(clo_show_mismatched_frees) = True;
6067 Bool MC_(clo_show_realloc_size_zero) = True;
6069 ExpensiveDefinednessChecks
6070 MC_(clo_expensive_definedness_checks) = EdcAUTO;
6072 Bool MC_(clo_ignore_range_below_sp) = False;
6073 UInt MC_(clo_ignore_range_below_sp__first_offset) = 0;
6074 UInt MC_(clo_ignore_range_below_sp__last_offset) = 0;
6076 static const HChar * MC_(parse_leak_heuristics_tokens) =
6077 "-,stdstring,length64,newarray,multipleinheritance";
6078 /* The first heuristic value (LchNone) has no keyword, as this is
6079 a fake heuristic used to collect the blocks found without any
6080 heuristic. */
6082 static Bool mc_process_cmd_line_options(const HChar* arg)
6084 const HChar* tmp_str;
6085 Bool tmp_show;
6087 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6089 /* Set MC_(clo_mc_level):
6090 1 = A bit tracking only
6091 2 = A and V bit tracking, but no V bit origins
6092 3 = A and V bit tracking, and V bit origins
6094 Do this by inspecting --undef-value-errors= and
6095 --track-origins=. Reject the case --undef-value-errors=no
6096 --track-origins=yes as meaningless.
6098 if VG_BOOL_CLO(arg, "--undef-value-errors", tmp_show) {
6099 if (tmp_show) {
6100 if (MC_(clo_mc_level) == 1)
6101 MC_(clo_mc_level) = 2;
6102 } else {
6103 if (MC_(clo_mc_level) == 3) {
6104 goto bad_level;
6105 } else {
6106 MC_(clo_mc_level) = 1;
6110 else if VG_BOOL_CLO(arg, "--track-origins", tmp_show) {
6111 if (tmp_show) {
6112 if (MC_(clo_mc_level) == 1) {
6113 goto bad_level;
6114 } else {
6115 MC_(clo_mc_level) = 3;
6117 } else {
6118 if (MC_(clo_mc_level) == 3)
6119 MC_(clo_mc_level) = 2;
6122 else if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
6123 else if VG_USET_CLOM(cloPD, arg, "--errors-for-leak-kinds",
6124 MC_(parse_leak_kinds_tokens),
6125 MC_(clo_error_for_leak_kinds)) {}
6126 else if VG_USET_CLOM(cloPD, arg, "--show-leak-kinds",
6127 MC_(parse_leak_kinds_tokens),
6128 MC_(clo_show_leak_kinds)) {}
6129 else if VG_USET_CLOM(cloPD, arg, "--leak-check-heuristics",
6130 MC_(parse_leak_heuristics_tokens),
6131 MC_(clo_leak_check_heuristics)) {}
6132 else if (VG_BOOL_CLOM(cloPD, arg, "--show-reachable", tmp_show)) {
6133 if (tmp_show) {
6134 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
6135 } else {
6136 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
6139 else if VG_BOOL_CLOM(cloPD, arg, "--show-possibly-lost", tmp_show) {
6140 if (tmp_show) {
6141 MC_(clo_show_leak_kinds) |= R2S(Possible);
6142 } else {
6143 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
6146 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
6147 MC_(clo_workaround_gcc296_bugs)) {}
6149 else if VG_BINT_CLOM(cloPD, arg, "--freelist-vol", MC_(clo_freelist_vol),
6150 0, 10*1000*1000*1000LL) {}
6152 else if VG_BINT_CLOM(cloPD, arg, "--freelist-big-blocks",
6153 MC_(clo_freelist_big_blocks),
6154 0, 10*1000*1000*1000LL) {}
6156 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=no",
6157 MC_(clo_leak_check), LC_Off) {}
6158 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=summary",
6159 MC_(clo_leak_check), LC_Summary) {}
6160 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=yes",
6161 MC_(clo_leak_check), LC_Full) {}
6162 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=full",
6163 MC_(clo_leak_check), LC_Full) {}
6165 else if VG_XACT_CLO(arg, "--leak-resolution=low",
6166 MC_(clo_leak_resolution), Vg_LowRes) {}
6167 else if VG_XACT_CLO(arg, "--leak-resolution=med",
6168 MC_(clo_leak_resolution), Vg_MedRes) {}
6169 else if VG_XACT_CLO(arg, "--leak-resolution=high",
6170 MC_(clo_leak_resolution), Vg_HighRes) {}
6172 else if VG_STR_CLOM(cloPD, arg, "--ignore-ranges", tmp_str) {
6173 Bool ok = parse_ignore_ranges(tmp_str);
6174 if (!ok) {
6175 VG_(message)(Vg_DebugMsg,
6176 "ERROR: --ignore-ranges: "
6177 "invalid syntax, or end <= start in range\n");
6178 return False;
6180 if (gIgnoredAddressRanges) {
6181 UInt i;
6182 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6183 UWord val = IAR_INVALID;
6184 UWord key_min = ~(UWord)0;
6185 UWord key_max = (UWord)0;
6186 VG_(indexRangeMap)( &key_min, &key_max, &val,
6187 gIgnoredAddressRanges, i );
6188 tl_assert(key_min <= key_max);
6189 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
6190 if (key_max - key_min > limit && val == IAR_CommandLine) {
6191 VG_(message)(Vg_DebugMsg,
6192 "ERROR: --ignore-ranges: suspiciously large range:\n");
6193 VG_(message)(Vg_DebugMsg,
6194 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
6195 key_max - key_min + 1);
6196 return False;
6202 else if VG_STR_CLOM(cloPD, arg, "--ignore-range-below-sp", tmp_str) {
6203 /* This seems at first a bit weird, but: in order to imply
6204 a non-wrapped-around address range, the first offset needs to be
6205 larger than the second one. For example
6206 --ignore-range-below-sp=8192,8189
6207 would cause accesses to in the range [SP-8192, SP-8189] to be
6208 ignored. */
6209 UInt offs1 = 0, offs2 = 0;
6210 Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
6211 // Ensure we used all the text after the '=' sign.
6212 if (ok && *tmp_str != 0) ok = False;
6213 if (!ok) {
6214 VG_(message)(Vg_DebugMsg,
6215 "ERROR: --ignore-range-below-sp: invalid syntax. "
6216 " Expected \"...=decimalnumber-decimalnumber\".\n");
6217 return False;
6219 if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
6220 VG_(message)(Vg_DebugMsg,
6221 "ERROR: --ignore-range-below-sp: suspiciously large "
6222 "offset(s): %u and %u\n", offs1, offs2);
6223 return False;
6225 if (offs1 <= offs2) {
6226 VG_(message)(Vg_DebugMsg,
6227 "ERROR: --ignore-range-below-sp: invalid offsets "
6228 "(the first must be larger): %u and %u\n", offs1, offs2);
6229 return False;
6231 tl_assert(offs1 > offs2);
6232 if (offs1 - offs2 > 4096 /*arbitrary*/) {
6233 VG_(message)(Vg_DebugMsg,
6234 "ERROR: --ignore-range-below-sp: suspiciously large "
6235 "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
6236 return False;
6238 MC_(clo_ignore_range_below_sp) = True;
6239 MC_(clo_ignore_range_below_sp__first_offset) = offs1;
6240 MC_(clo_ignore_range_below_sp__last_offset) = offs2;
6241 return True;
6244 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
6245 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
6247 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
6248 MC_(clo_keep_stacktraces), KS_alloc) {}
6249 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
6250 MC_(clo_keep_stacktraces), KS_free) {}
6251 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
6252 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
6253 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
6254 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
6255 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
6256 MC_(clo_keep_stacktraces), KS_none) {}
6258 else if VG_BOOL_CLOM(cloPD, arg, "--show-mismatched-frees",
6259 MC_(clo_show_mismatched_frees)) {}
6260 else if VG_BOOL_CLOM(cloPD, arg, "--show-realloc-size-zero",
6261 MC_(clo_show_realloc_size_zero)) {}
6263 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=no",
6264 MC_(clo_expensive_definedness_checks), EdcNO) {}
6265 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=auto",
6266 MC_(clo_expensive_definedness_checks), EdcAUTO) {}
6267 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=yes",
6268 MC_(clo_expensive_definedness_checks), EdcYES) {}
6270 else if VG_BOOL_CLO(arg, "--xtree-leak",
6271 MC_(clo_xtree_leak)) {}
6272 else if VG_STR_CLO (arg, "--xtree-leak-file",
6273 MC_(clo_xtree_leak_file)) {}
6275 else
6276 return VG_(replacement_malloc_process_cmd_line_option)(arg);
6278 return True;
6281 bad_level:
6282 VG_(fmsg_bad_option)(arg,
6283 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6284 return False;
6287 static void mc_print_usage(void)
6289 VG_(printf)(
6290 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6291 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6292 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6293 " [definite,possible]\n"
6294 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6295 " [definite,possible]\n"
6296 " where kind is one of:\n"
6297 " definite indirect possible reachable all none\n"
6298 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6299 " improving leak search false positive [all]\n"
6300 " where heur is one of:\n"
6301 " stdstring length64 newarray multipleinheritance all none\n"
6302 " --show-reachable=yes same as --show-leak-kinds=all\n"
6303 " --show-reachable=no --show-possibly-lost=yes\n"
6304 " same as --show-leak-kinds=definite,possible\n"
6305 " --show-reachable=no --show-possibly-lost=no\n"
6306 " same as --show-leak-kinds=definite\n"
6307 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6308 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6309 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6310 " --track-origins=no|yes show origins of undefined values? [no]\n"
6311 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6312 " --expensive-definedness-checks=no|auto|yes\n"
6313 " Use extra-precise definedness tracking [auto]\n"
6314 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6315 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6316 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6317 " Use --ignore-range-below-sp instead.\n"
6318 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6319 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6320 " accesses at the given offsets below SP\n"
6321 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6322 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6323 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6324 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6325 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6326 " --show-realloc-size-zero=no|yes show realocs with a size of zero? [yes]\n"
6330 static void mc_print_debug_usage(void)
6332 VG_(printf)(
6333 " (none)\n"
6338 /*------------------------------------------------------------*/
6339 /*--- Client blocks ---*/
6340 /*------------------------------------------------------------*/
6342 /* Client block management:
6344 This is managed as an expanding array of client block descriptors.
6345 Indices of live descriptors are issued to the client, so it can ask
6346 to free them later. Therefore we cannot slide live entries down
6347 over dead ones. Instead we must use free/inuse flags and scan for
6348 an empty slot at allocation time. This in turn means allocation is
6349 relatively expensive, so we hope this does not happen too often.
6351 An unused block has start == size == 0
6354 /* type CGenBlock is defined in mc_include.h */
6356 /* This subsystem is self-initialising. */
6357 static UWord cgb_size = 0;
6358 static UWord cgb_used = 0;
6359 static CGenBlock* cgbs = NULL;
6361 /* Stats for this subsystem. */
6362 static ULong cgb_used_MAX = 0; /* Max in use. */
6363 static ULong cgb_allocs = 0; /* Number of allocs. */
6364 static ULong cgb_discards = 0; /* Number of discards. */
6365 static ULong cgb_search = 0; /* Number of searches. */
6368 /* Get access to the client block array. */
6369 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
6370 /*OUT*/UWord* nBlocks )
6372 *blocks = cgbs;
6373 *nBlocks = cgb_used;
6377 static
6378 Int alloc_client_block ( void )
6380 UWord i, sz_new;
6381 CGenBlock* cgbs_new;
6383 cgb_allocs++;
6385 for (i = 0; i < cgb_used; i++) {
6386 cgb_search++;
6387 if (cgbs[i].start == 0 && cgbs[i].size == 0)
6388 return i;
6391 /* Not found. Try to allocate one at the end. */
6392 if (cgb_used < cgb_size) {
6393 cgb_used++;
6394 return cgb_used-1;
6397 /* Ok, we have to allocate a new one. */
6398 tl_assert(cgb_used == cgb_size);
6399 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
6401 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
6402 for (i = 0; i < cgb_used; i++)
6403 cgbs_new[i] = cgbs[i];
6405 if (cgbs != NULL)
6406 VG_(free)( cgbs );
6407 cgbs = cgbs_new;
6409 cgb_size = sz_new;
6410 cgb_used++;
6411 if (cgb_used > cgb_used_MAX)
6412 cgb_used_MAX = cgb_used;
6413 return cgb_used-1;
6417 static void show_client_block_stats ( void )
6419 VG_(message)(Vg_DebugMsg,
6420 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6421 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6424 static void print_monitor_help ( void )
6426 VG_(gdb_printf)
6428 "\n"
6429 "memcheck monitor commands:\n"
6430 " xb <addr> [<len>]\n"
6431 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6432 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6433 " Then prints the bytes values below the corresponding validity bits\n"
6434 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6435 " Example: xb 0x8049c78 10\n"
6436 " get_vbits <addr> [<len>]\n"
6437 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6438 " make_memory [noaccess|undefined\n"
6439 " |defined|Definedifaddressable] <addr> [<len>]\n"
6440 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6441 " check_memory [addressable|defined] <addr> [<len>]\n"
6442 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6443 " and outputs a description of <addr>\n"
6444 " leak_check [full*|summary|xtleak]\n"
6445 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6446 " [heuristics heur1,heur2,...]\n"
6447 " [new|increased*|changed|any]\n"
6448 " [unlimited*|limited <max_loss_records_output>]\n"
6449 " * = defaults\n"
6450 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6451 " where kind is one of:\n"
6452 " definite indirect possible reachable all none\n"
6453 " where heur is one of:\n"
6454 " stdstring length64 newarray multipleinheritance all none*\n"
6455 " Examples: leak_check\n"
6456 " leak_check summary any\n"
6457 " leak_check full kinds indirect,possible\n"
6458 " leak_check full reachable any limited 100\n"
6459 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6460 " [unlimited*|limited <max_blocks>]\n"
6461 " [heuristics heur1,heur2,...]\n"
6462 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6463 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6464 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6465 " * = defaults\n"
6466 " who_points_at <addr> [<len>]\n"
6467 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6468 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6469 " with len > 1, will also show \"interior pointers\")\n"
6470 " xtmemory [<filename>]\n"
6471 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6472 "\n");
6475 /* Print szB bytes at address, with a format similar to the gdb command
6476 x /<szB>xb address.
6477 res[i] == 1 indicates the corresponding byte is addressable. */
6478 static void gdb_xb (Addr address, SizeT szB, Int res[])
6480 UInt i;
6482 for (i = 0; i < szB; i++) {
6483 UInt bnr = i % 8;
6484 if (bnr == 0) {
6485 if (i != 0)
6486 VG_(printf) ("\n"); // Terminate previous line
6487 VG_(printf) ("%p:", (void*)(address+i));
6489 if (res[i] == 1)
6490 VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6491 else
6492 VG_(printf) ("\t0x??");
6494 VG_(printf) ("\n"); // Terminate previous line
6498 /* Returns the address of the next non space character,
6499 or address of the string terminator. */
6500 static HChar* next_non_space (HChar *s)
6502 while (*s && *s == ' ')
6503 s++;
6504 return s;
6507 /* Parse an integer slice, i.e. a single integer or a range of integer.
6508 Syntax is:
6509 <integer>[..<integer> ]
6510 (spaces are allowed before and/or after ..).
6511 Return True if range correctly parsed, False otherwise. */
6512 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6513 UInt *from, UInt *to)
6515 HChar* wl;
6516 HChar *endptr;
6517 endptr = NULL;////
6518 wl = VG_(strtok_r) (s, " ", saveptr);
6520 /* slice must start with an integer. */
6521 if (wl == NULL) {
6522 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6523 return False;
6525 *from = VG_(strtoull10) (wl, &endptr);
6526 if (endptr == wl) {
6527 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6528 return False;
6531 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6532 /* wl token is an integer terminating the string
6533 or else next token does not start with .
6534 In both cases, the slice is a single integer. */
6535 *to = *from;
6536 return True;
6539 if (*endptr == '\0') {
6540 // iii .. => get the next token
6541 wl = VG_(strtok_r) (NULL, " .", saveptr);
6542 } else {
6543 // It must be iii..
6544 if (*endptr != '.' && *(endptr+1) != '.') {
6545 VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6546 return False;
6548 if ( *(endptr+2) == ' ') {
6549 // It must be iii.. jjj => get the next token
6550 wl = VG_(strtok_r) (NULL, " .", saveptr);
6551 } else {
6552 // It must be iii..jjj
6553 wl = endptr+2;
6557 *to = VG_(strtoull10) (wl, &endptr);
6558 if (*endptr != '\0') {
6559 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6560 return False;
6563 if (*from > *to) {
6564 VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6565 "in slice <from>..<to>\n");
6566 return False;
6569 return True;
6572 /* return True if request recognised, False otherwise */
6573 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6575 HChar* wcmd;
6576 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
6577 HChar *ssaveptr;
6579 VG_(strcpy) (s, req);
6581 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6582 /* NB: if possible, avoid introducing a new command below which
6583 starts with the same first letter(s) as an already existing
6584 command. This ensures a shorter abbreviation for the user. */
6585 switch (VG_(keyword_id)
6586 ("help get_vbits leak_check make_memory check_memory "
6587 "block_list who_points_at xb xtmemory",
6588 wcmd, kwd_report_duplicated_matches)) {
6589 case -2: /* multiple matches */
6590 return True;
6591 case -1: /* not found */
6592 return False;
6593 case 0: /* help */
6594 print_monitor_help();
6595 return True;
6596 case 1: { /* get_vbits */
6597 Addr address;
6598 SizeT szB = 1;
6599 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6600 UChar vbits;
6601 Int i;
6602 Int unaddressable = 0;
6603 for (i = 0; i < szB; i++) {
6604 Int res = mc_get_or_set_vbits_for_client
6605 (address+i, (Addr) &vbits, 1,
6606 False, /* get them */
6607 False /* is client request */ );
6608 /* we are before the first character on next line, print a \n. */
6609 if ((i % 32) == 0 && i != 0)
6610 VG_(printf) ("\n");
6611 /* we are before the next block of 4 starts, print a space. */
6612 else if ((i % 4) == 0 && i != 0)
6613 VG_(printf) (" ");
6614 if (res == 1) {
6615 VG_(printf) ("%02x", vbits);
6616 } else {
6617 tl_assert(3 == res);
6618 unaddressable++;
6619 VG_(printf) ("__");
6622 VG_(printf) ("\n");
6623 if (unaddressable) {
6624 VG_(printf)
6625 ("Address %p len %lu has %d bytes unaddressable\n",
6626 (void *)address, szB, unaddressable);
6629 return True;
6631 case 2: { /* leak_check */
6632 Int err = 0;
6633 LeakCheckParams lcp;
6634 HChar* xt_filename = NULL;
6635 HChar* kw;
6637 lcp.mode = LC_Full;
6638 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6639 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6640 lcp.heuristics = 0;
6641 lcp.deltamode = LCD_Increased;
6642 lcp.max_loss_records_output = 999999999;
6643 lcp.requested_by_monitor_command = True;
6644 lcp.xt_filename = NULL;
6646 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6647 kw != NULL;
6648 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6649 switch (VG_(keyword_id)
6650 ("full summary xtleak "
6651 "kinds reachable possibleleak definiteleak "
6652 "heuristics "
6653 "new increased changed any "
6654 "unlimited limited ",
6655 kw, kwd_report_all)) {
6656 case -2: err++; break;
6657 case -1: err++; break;
6658 case 0: /* full */
6659 lcp.mode = LC_Full; break;
6660 case 1: /* summary */
6661 lcp.mode = LC_Summary; break;
6662 case 2: /* xtleak */
6663 lcp.mode = LC_Full;
6664 xt_filename
6665 = VG_(expand_file_name)("--xtleak-mc_main.c",
6666 "xtleak.kcg.%p.%n");
6667 lcp.xt_filename = xt_filename;
6668 break;
6669 case 3: { /* kinds */
6670 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6671 if (wcmd == NULL
6672 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6673 True/*allow_all*/,
6674 wcmd,
6675 &lcp.show_leak_kinds)) {
6676 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6677 err++;
6679 break;
6681 case 4: /* reachable */
6682 lcp.show_leak_kinds = MC_(all_Reachedness)();
6683 break;
6684 case 5: /* possibleleak */
6685 lcp.show_leak_kinds
6686 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6687 break;
6688 case 6: /* definiteleak */
6689 lcp.show_leak_kinds = R2S(Unreached);
6690 break;
6691 case 7: { /* heuristics */
6692 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6693 if (wcmd == NULL
6694 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6695 True,/*allow_all*/
6696 wcmd,
6697 &lcp.heuristics)) {
6698 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6699 err++;
6701 break;
6703 case 8: /* new */
6704 lcp.deltamode = LCD_New; break;
6705 case 9: /* increased */
6706 lcp.deltamode = LCD_Increased; break;
6707 case 10: /* changed */
6708 lcp.deltamode = LCD_Changed; break;
6709 case 11: /* any */
6710 lcp.deltamode = LCD_Any; break;
6711 case 12: /* unlimited */
6712 lcp.max_loss_records_output = 999999999; break;
6713 case 13: { /* limited */
6714 Int int_value;
6715 const HChar* endptr;
6717 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6718 if (wcmd == NULL) {
6719 int_value = 0;
6720 endptr = "empty"; /* to report an error below */
6721 } else {
6722 HChar *the_end;
6723 int_value = VG_(strtoll10) (wcmd, &the_end);
6724 endptr = the_end;
6726 if (*endptr != '\0')
6727 VG_(gdb_printf) ("missing or malformed integer value\n");
6728 else if (int_value > 0)
6729 lcp.max_loss_records_output = (UInt) int_value;
6730 else
6731 VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6732 " got %d\n", int_value);
6733 break;
6735 default:
6736 tl_assert (0);
6739 if (!err)
6740 MC_(detect_memory_leaks)(tid, &lcp);
6741 if (xt_filename != NULL)
6742 VG_(free)(xt_filename);
6743 return True;
6746 case 3: { /* make_memory */
6747 Addr address;
6748 SizeT szB = 1;
6749 Int kwdid = VG_(keyword_id)
6750 ("noaccess undefined defined Definedifaddressable",
6751 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6752 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6753 return True;
6754 switch (kwdid) {
6755 case -2: break;
6756 case -1: break;
6757 case 0: MC_(make_mem_noaccess) (address, szB); break;
6758 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6759 MC_OKIND_USER ); break;
6760 case 2: MC_(make_mem_defined) ( address, szB ); break;
6761 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6762 default: tl_assert(0);
6764 return True;
6767 case 4: { /* check_memory */
6768 Addr address;
6769 SizeT szB = 1;
6770 Addr bad_addr;
6771 UInt okind;
6772 const HChar* src;
6773 UInt otag;
6774 UInt ecu;
6775 ExeContext* origin_ec;
6776 MC_ReadResult res;
6778 Int kwdid = VG_(keyword_id)
6779 ("addressable defined",
6780 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6781 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6782 return True;
6783 switch (kwdid) {
6784 case -2: break;
6785 case -1: break;
6786 case 0: /* addressable */
6787 if (is_mem_addressable ( address, szB, &bad_addr ))
6788 VG_(printf) ("Address %p len %lu addressable\n",
6789 (void *)address, szB);
6790 else
6791 VG_(printf)
6792 ("Address %p len %lu not addressable:\nbad address %p\n",
6793 (void *)address, szB, (void *) bad_addr);
6794 // Describe this (probably live) address with current epoch
6795 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6796 break;
6797 case 1: /* defined */
6798 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6799 if (MC_AddrErr == res)
6800 VG_(printf)
6801 ("Address %p len %lu not addressable:\nbad address %p\n",
6802 (void *)address, szB, (void *) bad_addr);
6803 else if (MC_ValueErr == res) {
6804 okind = otag & 3;
6805 switch (okind) {
6806 case MC_OKIND_STACK:
6807 src = " was created by a stack allocation"; break;
6808 case MC_OKIND_HEAP:
6809 src = " was created by a heap allocation"; break;
6810 case MC_OKIND_USER:
6811 src = " was created by a client request"; break;
6812 case MC_OKIND_UNKNOWN:
6813 src = ""; break;
6814 default: tl_assert(0);
6816 VG_(printf)
6817 ("Address %p len %lu not defined:\n"
6818 "Uninitialised value at %p%s\n",
6819 (void *)address, szB, (void *) bad_addr, src);
6820 ecu = otag & ~3;
6821 if (VG_(is_plausible_ECU)(ecu)) {
6822 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6823 VG_(pp_ExeContext)( origin_ec );
6826 else
6827 VG_(printf) ("Address %p len %lu defined\n",
6828 (void *)address, szB);
6829 // Describe this (probably live) address with current epoch
6830 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6831 break;
6832 default: tl_assert(0);
6834 return True;
6837 case 5: { /* block_list */
6838 HChar* wl;
6839 HChar *the_end;
6840 UInt lr_nr_from = 0;
6841 UInt lr_nr_to = 0;
6843 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6844 UInt limit_blocks = 999999999;
6845 Int int_value;
6846 UInt heuristics = 0;
6848 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6849 wl != NULL;
6850 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6851 switch (VG_(keyword_id) ("unlimited limited heuristics ",
6852 wl, kwd_report_all)) {
6853 case -2: return True;
6854 case -1: return True;
6855 case 0: /* unlimited */
6856 limit_blocks = 999999999; break;
6857 case 1: /* limited */
6858 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6859 if (wcmd == NULL) {
6860 VG_(gdb_printf) ("missing integer value\n");
6861 return True;
6863 int_value = VG_(strtoll10) (wcmd, &the_end);
6864 if (*the_end != '\0') {
6865 VG_(gdb_printf) ("malformed integer value\n");
6866 return True;
6868 if (int_value <= 0) {
6869 VG_(gdb_printf) ("max_blocks must be >= 1,"
6870 " got %d\n", int_value);
6871 return True;
6873 limit_blocks = (UInt) int_value;
6874 break;
6875 case 2: /* heuristics */
6876 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6877 if (wcmd == NULL
6878 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6879 True,/*allow_all*/
6880 wcmd,
6881 &heuristics)) {
6882 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6883 return True;
6885 break;
6886 default:
6887 tl_assert (0);
6890 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6891 is 1 more than the index in lr_array. */
6892 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6893 lr_nr_to-1,
6894 limit_blocks,
6895 heuristics))
6896 VG_(gdb_printf) ("invalid loss record nr\n");
6898 return True;
6901 case 6: { /* who_points_at */
6902 Addr address;
6903 SizeT szB = 1;
6905 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6906 return True;
6907 if (address == (Addr) 0) {
6908 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6909 return True;
6911 MC_(who_points_at) (address, szB);
6912 return True;
6915 case 7: { /* xb */
6916 Addr address;
6917 SizeT szB = 1;
6918 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6919 UChar vbits[8];
6920 Int res[8];
6921 Int i;
6922 Int unaddressable = 0;
6923 for (i = 0; i < szB; i++) {
6924 Int bnr = i % 8;
6925 /* We going to print the first vabits of a new line.
6926 Terminate the previous line if needed: prints a line with the
6927 address and the data. */
6928 if (bnr == 0) {
6929 if (i != 0) {
6930 VG_(printf) ("\n");
6931 gdb_xb (address + i - 8, 8, res);
6933 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6935 res[bnr] = mc_get_or_set_vbits_for_client
6936 (address+i, (Addr) &vbits[bnr], 1,
6937 False, /* get them */
6938 False /* is client request */ );
6939 if (res[bnr] == 1) {
6940 VG_(printf) ("\t %02x", vbits[bnr]);
6941 } else {
6942 tl_assert(3 == res[bnr]);
6943 unaddressable++;
6944 VG_(printf) ("\t __");
6947 VG_(printf) ("\n");
6948 if (szB % 8 == 0 && szB > 0)
6949 gdb_xb (address + szB - 8, 8, res);
6950 else
6951 gdb_xb (address + szB - szB % 8, szB % 8, res);
6952 if (unaddressable) {
6953 VG_(printf)
6954 ("Address %p len %lu has %d bytes unaddressable\n",
6955 (void *)address, szB, unaddressable);
6958 return True;
6961 case 8: { /* xtmemory */
6962 HChar* filename;
6963 filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
6964 MC_(xtmemory_report)(filename, False);
6965 return True;
6968 default:
6969 tl_assert(0);
6970 return False;
6974 /*------------------------------------------------------------*/
6975 /*--- Client requests ---*/
6976 /*------------------------------------------------------------*/
6978 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6980 Int i;
6981 Addr bad_addr;
6982 MC_Chunk* mc = NULL;
6984 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6985 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6986 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6987 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6988 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6989 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6990 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6991 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6992 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6993 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
6994 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
6995 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
6996 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
6997 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6998 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6999 return False;
7001 switch (arg[0]) {
7002 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
7003 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
7004 if (!ok)
7005 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
7006 *ret = ok ? (UWord)NULL : bad_addr;
7007 break;
7010 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
7011 Bool errorV = False;
7012 Addr bad_addrV = 0;
7013 UInt otagV = 0;
7014 Bool errorA = False;
7015 Addr bad_addrA = 0;
7016 is_mem_defined_comprehensive(
7017 arg[1], arg[2],
7018 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
7020 if (errorV) {
7021 MC_(record_user_error) ( tid, bad_addrV,
7022 /*isAddrErr*/False, otagV );
7024 if (errorA) {
7025 MC_(record_user_error) ( tid, bad_addrA,
7026 /*isAddrErr*/True, 0 );
7028 /* Return the lower of the two erring addresses, if any. */
7029 *ret = 0;
7030 if (errorV && !errorA) {
7031 *ret = bad_addrV;
7033 if (!errorV && errorA) {
7034 *ret = bad_addrA;
7036 if (errorV && errorA) {
7037 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
7039 break;
7042 case VG_USERREQ__DO_LEAK_CHECK: {
7043 LeakCheckParams lcp;
7045 if (arg[1] == 0)
7046 lcp.mode = LC_Full;
7047 else if (arg[1] == 1)
7048 lcp.mode = LC_Summary;
7049 else {
7050 VG_(message)(Vg_UserMsg,
7051 "Warning: unknown memcheck leak search mode\n");
7052 lcp.mode = LC_Full;
7055 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7056 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7057 lcp.heuristics = MC_(clo_leak_check_heuristics);
7059 if (arg[2] == 0)
7060 lcp.deltamode = LCD_Any;
7061 else if (arg[2] == 1)
7062 lcp.deltamode = LCD_Increased;
7063 else if (arg[2] == 2)
7064 lcp.deltamode = LCD_Changed;
7065 else if (arg[2] == 3)
7066 lcp.deltamode = LCD_New;
7067 else {
7068 VG_(message)
7069 (Vg_UserMsg,
7070 "Warning: unknown memcheck leak search deltamode\n");
7071 lcp.deltamode = LCD_Any;
7073 lcp.max_loss_records_output = 999999999;
7074 lcp.requested_by_monitor_command = False;
7075 lcp.xt_filename = NULL;
7077 MC_(detect_memory_leaks)(tid, &lcp);
7078 *ret = 0; /* return value is meaningless */
7079 break;
7082 case VG_USERREQ__MAKE_MEM_NOACCESS:
7083 MC_(make_mem_noaccess) ( arg[1], arg[2] );
7084 *ret = -1;
7085 break;
7087 case VG_USERREQ__MAKE_MEM_UNDEFINED:
7088 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
7089 MC_OKIND_USER );
7090 *ret = -1;
7091 break;
7093 case VG_USERREQ__MAKE_MEM_DEFINED:
7094 MC_(make_mem_defined) ( arg[1], arg[2] );
7095 *ret = -1;
7096 break;
7098 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
7099 make_mem_defined_if_addressable ( arg[1], arg[2] );
7100 *ret = -1;
7101 break;
7103 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
7104 if (arg[1] != 0 && arg[2] != 0) {
7105 i = alloc_client_block();
7106 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7107 cgbs[i].start = arg[1];
7108 cgbs[i].size = arg[2];
7109 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
7110 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
7111 *ret = i;
7112 } else
7113 *ret = -1;
7114 break;
7116 case VG_USERREQ__DISCARD: /* discard */
7117 if (cgbs == NULL
7118 || arg[2] >= cgb_used ||
7119 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
7120 *ret = 1;
7121 } else {
7122 tl_assert(arg[2] < cgb_used);
7123 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
7124 VG_(free)(cgbs[arg[2]].desc);
7125 cgb_discards++;
7126 *ret = 0;
7128 break;
7130 case VG_USERREQ__GET_VBITS:
7131 *ret = mc_get_or_set_vbits_for_client
7132 ( arg[1], arg[2], arg[3],
7133 False /* get them */,
7134 True /* is client request */ );
7135 break;
7137 case VG_USERREQ__SET_VBITS:
7138 *ret = mc_get_or_set_vbits_for_client
7139 ( arg[1], arg[2], arg[3],
7140 True /* set them */,
7141 True /* is client request */ );
7142 break;
7144 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
7145 UWord** argp = (UWord**)arg;
7146 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7147 // if no prior leak checks performed).
7148 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
7149 *argp[2] = MC_(bytes_dubious);
7150 *argp[3] = MC_(bytes_reachable);
7151 *argp[4] = MC_(bytes_suppressed);
7152 // there is no argp[5]
7153 //*argp[5] = MC_(bytes_indirect);
7154 // XXX need to make *argp[1-4] defined; currently done in the
7155 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7156 *ret = 0;
7157 return True;
7159 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
7160 UWord** argp = (UWord**)arg;
7161 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7162 // if no prior leak checks performed).
7163 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
7164 *argp[2] = MC_(blocks_dubious);
7165 *argp[3] = MC_(blocks_reachable);
7166 *argp[4] = MC_(blocks_suppressed);
7167 // there is no argp[5]
7168 //*argp[5] = MC_(blocks_indirect);
7169 // XXX need to make *argp[1-4] defined; currently done in the
7170 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7171 *ret = 0;
7172 return True;
7174 case VG_USERREQ__MALLOCLIKE_BLOCK: {
7175 Addr p = (Addr)arg[1];
7176 SizeT sizeB = arg[2];
7177 UInt rzB = arg[3];
7178 Bool is_zeroed = (Bool)arg[4];
7180 MC_(new_block) ( tid, p, sizeB, /*ignored*/0U, 0U, is_zeroed,
7181 MC_AllocCustom, MC_(malloc_list) );
7182 if (rzB > 0) {
7183 MC_(make_mem_noaccess) ( p - rzB, rzB);
7184 MC_(make_mem_noaccess) ( p + sizeB, rzB);
7186 return True;
7188 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
7189 Addr p = (Addr)arg[1];
7190 SizeT oldSizeB = arg[2];
7191 SizeT newSizeB = arg[3];
7192 UInt rzB = arg[4];
7194 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
7195 return True;
7197 case VG_USERREQ__FREELIKE_BLOCK: {
7198 Addr p = (Addr)arg[1];
7199 UInt rzB = arg[2];
7201 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
7202 return True;
7205 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
7206 HChar* s = (HChar*)arg[1];
7207 Addr dst = (Addr) arg[2];
7208 Addr src = (Addr) arg[3];
7209 SizeT len = (SizeT)arg[4];
7210 MC_(record_overlap_error)(tid, s, src, dst, len);
7211 return True;
7214 case _VG_USERREQ__MEMCHECK_VERIFY_ALIGNMENT: {
7215 struct AlignedAllocInfo *aligned_alloc_info = (struct AlignedAllocInfo *)arg[1];
7216 tl_assert(aligned_alloc_info);
7218 switch (aligned_alloc_info->alloc_kind) {
7219 case AllocKindMemalign:
7220 // other platforms just ensure it is a power of 2
7221 // ignore Illumos only enforcing multiple of 4 (probably a bug)
7222 if (aligned_alloc_info->orig_alignment == 0U ||
7223 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7224 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be power of 2)" );
7226 // size zero not allowed on all platforms (e.g. Illumos)
7227 if (aligned_alloc_info->size == 0) {
7228 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "memalign()" );
7230 break;
7231 case AllocKindPosixMemalign:
7232 // must be power of 2
7233 // alignment at least sizeof(size_t)
7234 // size of 0 implementation defined
7235 if (aligned_alloc_info->orig_alignment < sizeof(SizeT) ||
7236 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7237 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero, a power of 2 and a multiple of sizeof(void*))" );
7239 if (aligned_alloc_info->size == 0) {
7240 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "posix_memalign()" );
7242 break;
7243 case AllocKindAlignedAlloc:
7244 // must be power of 2
7245 if ((aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7246 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be a power of 2)" );
7248 // size should be integral multiple of alignment
7249 if (aligned_alloc_info->orig_alignment &&
7250 aligned_alloc_info->size % aligned_alloc_info->orig_alignment != 0U) {
7251 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , aligned_alloc_info->size, " (size should be a multiple of alignment)" );
7253 if (aligned_alloc_info->size == 0) {
7254 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "aligned_alloc()" );
7256 break;
7257 case AllocKindDeleteSized:
7258 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7259 if (mc && mc->szB != aligned_alloc_info->size) {
7260 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new/delete" );
7262 break;
7263 case AllocKindVecDeleteSized:
7264 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7265 if (mc && mc->szB != aligned_alloc_info->size) {
7266 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new[][/delete[]" );
7268 break;
7269 case AllocKindNewAligned:
7270 if (aligned_alloc_info->orig_alignment == 0 ||
7271 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7272 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7274 break;
7275 case AllocKindVecNewAligned:
7276 if (aligned_alloc_info->orig_alignment == 0 ||
7277 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7278 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7280 break;
7281 case AllocKindDeleteDefault:
7282 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7283 if (mc && mc->alignB) {
7284 MC_(record_align_mismatch_error) ( tid, mc, 0U, True, "new/delete");
7286 break;
7287 case AllocKindDeleteAligned:
7288 if (aligned_alloc_info->orig_alignment == 0 ||
7289 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7290 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7292 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7293 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7294 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new/delete");
7296 break;
7297 case AllocKindVecDeleteDefault:
7298 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7299 if (mc && mc->alignB) {
7300 MC_(record_align_mismatch_error) ( tid, mc, 0U, True, "new[]/delete[]");
7302 break;
7303 case AllocKindVecDeleteAligned:
7304 if (aligned_alloc_info->orig_alignment == 0 ||
7305 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7306 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7308 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7309 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7310 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new[]/delete[]");
7312 break;
7313 case AllocKindDeleteSizedAligned:
7314 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7315 if (mc && mc->szB != aligned_alloc_info->size) {
7316 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new/delete");
7318 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7319 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new/delete");
7321 if (aligned_alloc_info->orig_alignment == 0 ||
7322 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7323 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7325 break;
7326 case AllocKindVecDeleteSizedAligned:
7327 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7328 if (mc && mc->szB != aligned_alloc_info->size) {
7329 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new[]/delete[]" );
7331 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7332 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new[]/delete[]");
7334 if (aligned_alloc_info->orig_alignment == 0 ||
7335 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7336 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7338 break;
7339 default:
7340 tl_assert (False);
7343 return True;
7346 case VG_USERREQ__CREATE_MEMPOOL: {
7347 Addr pool = (Addr)arg[1];
7348 UInt rzB = arg[2];
7349 Bool is_zeroed = (Bool)arg[3];
7350 UInt flags = arg[4];
7352 // The create_mempool function does not know these mempool flags,
7353 // pass as booleans.
7354 MC_(create_mempool) ( pool, rzB, is_zeroed,
7355 (flags & VALGRIND_MEMPOOL_AUTO_FREE),
7356 (flags & VALGRIND_MEMPOOL_METAPOOL) );
7357 return True;
7360 case VG_USERREQ__DESTROY_MEMPOOL: {
7361 Addr pool = (Addr)arg[1];
7363 MC_(destroy_mempool) ( pool );
7364 return True;
7367 case VG_USERREQ__MEMPOOL_ALLOC: {
7368 Addr pool = (Addr)arg[1];
7369 Addr addr = (Addr)arg[2];
7370 UInt size = arg[3];
7372 MC_(mempool_alloc) ( tid, pool, addr, size );
7373 return True;
7376 case VG_USERREQ__MEMPOOL_FREE: {
7377 Addr pool = (Addr)arg[1];
7378 Addr addr = (Addr)arg[2];
7380 MC_(mempool_free) ( pool, addr );
7381 return True;
7384 case VG_USERREQ__MEMPOOL_TRIM: {
7385 Addr pool = (Addr)arg[1];
7386 Addr addr = (Addr)arg[2];
7387 UInt size = arg[3];
7389 MC_(mempool_trim) ( pool, addr, size );
7390 return True;
7393 case VG_USERREQ__MOVE_MEMPOOL: {
7394 Addr poolA = (Addr)arg[1];
7395 Addr poolB = (Addr)arg[2];
7397 MC_(move_mempool) ( poolA, poolB );
7398 return True;
7401 case VG_USERREQ__MEMPOOL_CHANGE: {
7402 Addr pool = (Addr)arg[1];
7403 Addr addrA = (Addr)arg[2];
7404 Addr addrB = (Addr)arg[3];
7405 UInt size = arg[4];
7407 MC_(mempool_change) ( pool, addrA, addrB, size );
7408 return True;
7411 case VG_USERREQ__MEMPOOL_EXISTS: {
7412 Addr pool = (Addr)arg[1];
7414 *ret = (UWord) MC_(mempool_exists) ( pool );
7415 return True;
7418 case VG_USERREQ__GDB_MONITOR_COMMAND: {
7419 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
7420 if (handled)
7421 *ret = 1;
7422 else
7423 *ret = 0;
7424 return handled;
7427 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
7428 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
7429 Bool addRange
7430 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
7431 Bool ok
7432 = modify_ignore_ranges(addRange, arg[1], arg[2]);
7433 *ret = ok ? 1 : 0;
7434 return True;
7437 default:
7438 VG_(message)(Vg_UserMsg,
7439 "Warning: unknown memcheck client request code %llx\n",
7440 (ULong)arg[0]);
7441 return False;
7443 return True;
7447 /*------------------------------------------------------------*/
7448 /*--- Crude profiling machinery. ---*/
7449 /*------------------------------------------------------------*/
7451 // We track a number of interesting events (using PROF_EVENT)
7452 // if MC_PROFILE_MEMORY is defined.
7454 #ifdef MC_PROFILE_MEMORY
7456 ULong MC_(event_ctr)[MCPE_LAST];
7458 /* Event counter names. Use the name of the function that increases the
7459 event counter. Drop any MC_() and mc_ prefices. */
7460 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
7461 [MCPE_LOADVN_SLOW] = "LOADVn_slow",
7462 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
7463 [MCPE_STOREVN_SLOW] = "STOREVn_slow",
7464 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
7465 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
7466 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
7467 "make_aligned_word32_undefined_slow",
7468 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
7469 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
7470 "make_aligned_word64_undefined_slow",
7471 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
7472 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
7473 "make_aligned_word32_noaccess_slow",
7474 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
7475 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
7476 "make_aligned_word64_noaccess_slow",
7477 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
7478 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
7479 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
7480 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
7481 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
7482 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
7483 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
7484 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
7485 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
7486 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
7487 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
7488 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
7489 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
7490 [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
7491 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
7492 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
7493 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
7494 "is_mem_defined_comprehensive(loop)",
7495 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
7496 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
7497 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
7498 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
7499 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
7500 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
7501 "set_address_range_perms(single-secmap)",
7502 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
7503 "set_address_range_perms(startof-secmap)",
7504 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
7505 "set_address_range_perms(multiple-secmaps)",
7506 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
7507 "set_address_range_perms(dist-sm1)",
7508 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
7509 "set_address_range_perms(dist-sm2)",
7510 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
7511 "set_address_range_perms(dist-sm1-quick)",
7512 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
7513 "set_address_range_perms(dist-sm2-quick)",
7514 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
7515 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
7516 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
7517 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
7518 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
7519 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
7520 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
7521 "set_address_range_perms(loop64K-free-dist-sm)",
7522 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
7523 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256",
7524 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
7525 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
7526 [MCPE_LOADV64] = "LOADV64",
7527 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1",
7528 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2",
7529 [MCPE_STOREV64] = "STOREV64",
7530 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
7531 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
7532 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
7533 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
7534 [MCPE_LOADV32] = "LOADV32",
7535 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1",
7536 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2",
7537 [MCPE_STOREV32] = "STOREV32",
7538 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
7539 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
7540 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
7541 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
7542 [MCPE_LOADV16] = "LOADV16",
7543 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1",
7544 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2",
7545 [MCPE_STOREV16] = "STOREV16",
7546 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
7547 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
7548 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
7549 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
7550 [MCPE_LOADV8] = "LOADV8",
7551 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1",
7552 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2",
7553 [MCPE_STOREV8] = "STOREV8",
7554 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1",
7555 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2",
7556 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3",
7557 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4",
7558 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4",
7559 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8",
7560 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12",
7561 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16",
7562 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32",
7563 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
7564 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
7565 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
7566 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
7567 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4",
7568 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8",
7569 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12",
7570 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16",
7571 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32",
7572 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
7573 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
7574 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
7575 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
7576 [MCPE_NEW_MEM_STACK] = "new_mem_stack",
7577 [MCPE_DIE_MEM_STACK] = "die_mem_stack",
7578 [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o",
7579 [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o",
7580 [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
7581 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
7582 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7583 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
7584 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7585 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
7586 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7589 static void init_prof_mem ( void )
7591 Int i, name_count = 0;
7593 for (i = 0; i < MCPE_LAST; i++) {
7594 MC_(event_ctr)[i] = 0;
7595 if (MC_(event_ctr_name)[i] != NULL)
7596 ++name_count;
7599 /* Make sure every profiling event has a name */
7600 tl_assert(name_count == MCPE_LAST);
7603 static void done_prof_mem ( void )
7605 Int i, n;
7606 Bool spaced = False;
7607 for (i = n = 0; i < MCPE_LAST; i++) {
7608 if (!spaced && (n % 10) == 0) {
7609 VG_(printf)("\n");
7610 spaced = True;
7612 if (MC_(event_ctr)[i] > 0) {
7613 spaced = False;
7614 ++n;
7615 VG_(printf)( "prof mem event %3d: %11llu %s\n",
7616 i, MC_(event_ctr)[i],
7617 MC_(event_ctr_name)[i]);
7622 #else
7624 static void init_prof_mem ( void ) { }
7625 static void done_prof_mem ( void ) { }
7627 #endif
7630 /*------------------------------------------------------------*/
7631 /*--- Origin tracking stuff ---*/
7632 /*------------------------------------------------------------*/
7634 /*--------------------------------------------*/
7635 /*--- Origin tracking: load handlers ---*/
7636 /*--------------------------------------------*/
7638 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7639 return or1 > or2 ? or1 : or2;
7642 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7643 OCacheLine* line;
7644 UChar descr;
7645 UWord lineoff = oc_line_offset(a);
7646 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7648 if (OC_ENABLE_ASSERTIONS) {
7649 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7652 line = find_OCacheLine( a );
7654 descr = line->u.main.descr[lineoff];
7655 if (OC_ENABLE_ASSERTIONS) {
7656 tl_assert(descr < 0x10);
7659 if (LIKELY(0 == (descr & (1 << byteoff)))) {
7660 return 0;
7661 } else {
7662 return line->u.main.w32[lineoff];
7666 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7667 OCacheLine* line;
7668 UChar descr;
7669 UWord lineoff, byteoff;
7671 if (UNLIKELY(a & 1)) {
7672 /* Handle misaligned case, slowly. */
7673 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
7674 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
7675 return merge_origins(oLo, oHi);
7678 lineoff = oc_line_offset(a);
7679 byteoff = a & 3; /* 0 or 2 */
7681 if (OC_ENABLE_ASSERTIONS) {
7682 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7684 line = find_OCacheLine( a );
7686 descr = line->u.main.descr[lineoff];
7687 if (OC_ENABLE_ASSERTIONS) {
7688 tl_assert(descr < 0x10);
7691 if (LIKELY(0 == (descr & (3 << byteoff)))) {
7692 return 0;
7693 } else {
7694 return line->u.main.w32[lineoff];
7698 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7699 OCacheLine* line;
7700 UChar descr;
7701 UWord lineoff;
7703 if (UNLIKELY(a & 3)) {
7704 /* Handle misaligned case, slowly. */
7705 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
7706 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
7707 return merge_origins(oLo, oHi);
7710 lineoff = oc_line_offset(a);
7711 if (OC_ENABLE_ASSERTIONS) {
7712 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7715 line = find_OCacheLine( a );
7717 descr = line->u.main.descr[lineoff];
7718 if (OC_ENABLE_ASSERTIONS) {
7719 tl_assert(descr < 0x10);
7722 if (LIKELY(0 == descr)) {
7723 return 0;
7724 } else {
7725 return line->u.main.w32[lineoff];
7729 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7730 OCacheLine* line;
7731 UChar descrLo, descrHi, descr;
7732 UWord lineoff;
7734 if (UNLIKELY(a & 7)) {
7735 /* Handle misaligned case, slowly. */
7736 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
7737 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
7738 return merge_origins(oLo, oHi);
7741 lineoff = oc_line_offset(a);
7742 if (OC_ENABLE_ASSERTIONS) {
7743 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7746 line = find_OCacheLine( a );
7748 descrLo = line->u.main.descr[lineoff + 0];
7749 descrHi = line->u.main.descr[lineoff + 1];
7750 descr = descrLo | descrHi;
7751 if (OC_ENABLE_ASSERTIONS) {
7752 tl_assert(descr < 0x10);
7755 if (LIKELY(0 == descr)) {
7756 return 0; /* both 32-bit chunks are defined */
7757 } else {
7758 UInt oLo = descrLo == 0 ? 0 : line->u.main.w32[lineoff + 0];
7759 UInt oHi = descrHi == 0 ? 0 : line->u.main.w32[lineoff + 1];
7760 return merge_origins(oLo, oHi);
7764 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7765 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
7766 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
7767 UInt oBoth = merge_origins(oLo, oHi);
7768 return (UWord)oBoth;
7771 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7772 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
7773 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
7774 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
7775 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
7776 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
7777 merge_origins(oQ2, oQ3));
7778 return (UWord)oAll;
7782 /*--------------------------------------------*/
7783 /*--- Origin tracking: store handlers ---*/
7784 /*--------------------------------------------*/
7786 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7787 OCacheLine* line;
7788 UWord lineoff = oc_line_offset(a);
7789 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7791 if (OC_ENABLE_ASSERTIONS) {
7792 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7795 line = find_OCacheLine( a );
7797 #if OC_PRECISION_STORE
7798 if (LIKELY(d32 == 0)) {
7799 // The byte is defined. Just mark it as so in the descr and leave the w32
7800 // unchanged. This may make the descr become zero, so the line no longer
7801 // contains useful info, but that's OK. No loss of information.
7802 line->u.main.descr[lineoff] &= ~(1 << byteoff);
7803 } else if (d32 == line->u.main.w32[lineoff]) {
7804 // At least one of the four bytes in the w32 is undefined with the same
7805 // origin. Just extend the mask. No loss of information.
7806 line->u.main.descr[lineoff] |= (1 << byteoff);
7807 } else {
7808 // Here, we have a conflict: at least one byte in the group is undefined
7809 // but with some other origin. We can't represent both origins, so we
7810 // forget about the previous origin and install this one instead.
7811 line->u.main.descr[lineoff] = (1 << byteoff);
7812 line->u.main.w32[lineoff] = d32;
7814 #else
7815 if (d32 == 0) {
7816 line->u.main.descr[lineoff] &= ~(1 << byteoff);
7817 } else {
7818 line->u.main.descr[lineoff] |= (1 << byteoff);
7819 line->u.main.w32[lineoff] = d32;
7821 #endif
7824 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7825 OCacheLine* line;
7826 UWord lineoff, byteoff;
7828 if (UNLIKELY(a & 1)) {
7829 /* Handle misaligned case, slowly. */
7830 MC_(helperc_b_store1)( a + 0, d32 );
7831 MC_(helperc_b_store1)( a + 1, d32 );
7832 return;
7835 lineoff = oc_line_offset(a);
7836 byteoff = a & 3; /* 0 or 2 */
7838 if (OC_ENABLE_ASSERTIONS) {
7839 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7842 line = find_OCacheLine( a );
7844 #if OC_PRECISION_STORE
7845 // Same logic as in the store1 case above.
7846 if (LIKELY(d32 == 0)) {
7847 line->u.main.descr[lineoff] &= ~(3 << byteoff);
7848 } else if (d32 == line->u.main.w32[lineoff]) {
7849 line->u.main.descr[lineoff] |= (3 << byteoff);
7850 line->u.main.w32[lineoff] = d32;
7851 } else {
7852 line->u.main.descr[lineoff] = (3 << byteoff);
7853 line->u.main.w32[lineoff] = d32;
7855 #else
7856 if (d32 == 0) {
7857 line->u.main.descr[lineoff] &= ~(3 << byteoff);
7858 } else {
7859 line->u.main.descr[lineoff] |= (3 << byteoff);
7860 line->u.main.w32[lineoff] = d32;
7862 #endif
7865 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7866 OCacheLine* line;
7867 UWord lineoff;
7869 if (UNLIKELY(a & 3)) {
7870 /* Handle misaligned case, slowly. */
7871 MC_(helperc_b_store2)( a + 0, d32 );
7872 MC_(helperc_b_store2)( a + 2, d32 );
7873 return;
7876 lineoff = oc_line_offset(a);
7877 if (OC_ENABLE_ASSERTIONS) {
7878 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7881 line = find_OCacheLine( a );
7883 if (d32 == 0) {
7884 line->u.main.descr[lineoff] = 0;
7885 } else {
7886 line->u.main.descr[lineoff] = 0xF;
7887 line->u.main.w32[lineoff] = d32;
7891 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7892 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7893 OCacheLine* line;
7894 UWord lineoff;
7896 if (UNLIKELY(a & 7)) {
7897 /* Handle misaligned case, slowly. */
7898 MC_(helperc_b_store4)( a + 0, d32 );
7899 MC_(helperc_b_store4)( a + 4, d32 );
7900 return;
7903 lineoff = oc_line_offset(a);
7904 if (OC_ENABLE_ASSERTIONS) {
7905 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7908 line = find_OCacheLine( a );
7910 if (d32 == 0) {
7911 line->u.main.descr[lineoff + 0] = 0;
7912 line->u.main.descr[lineoff + 1] = 0;
7913 } else {
7914 line->u.main.descr[lineoff + 0] = 0xF;
7915 line->u.main.descr[lineoff + 1] = 0xF;
7916 line->u.main.w32[lineoff + 0] = d32;
7917 line->u.main.w32[lineoff + 1] = d32;
7921 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7922 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7923 OCacheLine* line;
7924 UWord lineoff;
7926 if (UNLIKELY(a & 15)) {
7927 /* Handle misaligned case, slowly. */
7928 MC_(helperc_b_store8)( a + 0, d32 );
7929 MC_(helperc_b_store8)( a + 8, d32 );
7930 return;
7933 lineoff = oc_line_offset(a);
7934 if (OC_ENABLE_ASSERTIONS) {
7935 tl_assert(lineoff == (lineoff & 4)); /*0,4*//*since 16-aligned*/
7938 line = find_OCacheLine( a );
7940 if (d32 == 0) {
7941 line->u.main.descr[lineoff + 0] = 0;
7942 line->u.main.descr[lineoff + 1] = 0;
7943 line->u.main.descr[lineoff + 2] = 0;
7944 line->u.main.descr[lineoff + 3] = 0;
7945 } else {
7946 line->u.main.descr[lineoff + 0] = 0xF;
7947 line->u.main.descr[lineoff + 1] = 0xF;
7948 line->u.main.descr[lineoff + 2] = 0xF;
7949 line->u.main.descr[lineoff + 3] = 0xF;
7950 line->u.main.w32[lineoff + 0] = d32;
7951 line->u.main.w32[lineoff + 1] = d32;
7952 line->u.main.w32[lineoff + 2] = d32;
7953 line->u.main.w32[lineoff + 3] = d32;
7957 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7958 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7959 OCacheLine* line;
7960 UWord lineoff;
7962 if (UNLIKELY(a & 31)) {
7963 /* Handle misaligned case, slowly. */
7964 MC_(helperc_b_store16)( a + 0, d32 );
7965 MC_(helperc_b_store16)( a + 16, d32 );
7966 return;
7969 lineoff = oc_line_offset(a);
7970 if (OC_ENABLE_ASSERTIONS) {
7971 tl_assert(lineoff == 0);
7974 line = find_OCacheLine( a );
7976 if (d32 == 0) {
7977 line->u.main.descr[0] = 0;
7978 line->u.main.descr[1] = 0;
7979 line->u.main.descr[2] = 0;
7980 line->u.main.descr[3] = 0;
7981 line->u.main.descr[4] = 0;
7982 line->u.main.descr[5] = 0;
7983 line->u.main.descr[6] = 0;
7984 line->u.main.descr[7] = 0;
7985 } else {
7986 line->u.main.descr[0] = 0xF;
7987 line->u.main.descr[1] = 0xF;
7988 line->u.main.descr[2] = 0xF;
7989 line->u.main.descr[3] = 0xF;
7990 line->u.main.descr[4] = 0xF;
7991 line->u.main.descr[5] = 0xF;
7992 line->u.main.descr[6] = 0xF;
7993 line->u.main.descr[7] = 0xF;
7994 line->u.main.w32[0] = d32;
7995 line->u.main.w32[1] = d32;
7996 line->u.main.w32[2] = d32;
7997 line->u.main.w32[3] = d32;
7998 line->u.main.w32[4] = d32;
7999 line->u.main.w32[5] = d32;
8000 line->u.main.w32[6] = d32;
8001 line->u.main.w32[7] = d32;
8006 /*--------------------------------------------*/
8007 /*--- Origin tracking: sarp handlers ---*/
8008 /*--------------------------------------------*/
8010 // We may get asked to do very large SARPs (bug 446103), hence it is important
8011 // to process 32-byte chunks at a time when possible.
8013 __attribute__((noinline))
8014 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
8015 if ((a & 1) && len >= 1) {
8016 MC_(helperc_b_store1)( a, otag );
8017 a++;
8018 len--;
8020 if ((a & 2) && len >= 2) {
8021 MC_(helperc_b_store2)( a, otag );
8022 a += 2;
8023 len -= 2;
8025 if ((a & 4) && len >= 4) {
8026 MC_(helperc_b_store4)( a, otag );
8027 a += 4;
8028 len -= 4;
8030 if ((a & 8) && len >= 8) {
8031 MC_(helperc_b_store8)( a, otag );
8032 a += 8;
8033 len -= 8;
8035 if ((a & 16) && len >= 16) {
8036 MC_(helperc_b_store16)( a, otag );
8037 a += 16;
8038 len -= 16;
8040 if (len >= 32) {
8041 tl_assert(0 == (a & 31));
8042 while (len >= 32) {
8043 MC_(helperc_b_store32)( a, otag );
8044 a += 32;
8045 len -= 32;
8048 if (len >= 16) {
8049 MC_(helperc_b_store16)( a, otag );
8050 a += 16;
8051 len -= 16;
8053 if (len >= 8) {
8054 MC_(helperc_b_store8)( a, otag );
8055 a += 8;
8056 len -= 8;
8058 if (len >= 4) {
8059 MC_(helperc_b_store4)( a, otag );
8060 a += 4;
8061 len -= 4;
8063 if (len >= 2) {
8064 MC_(helperc_b_store2)( a, otag );
8065 a += 2;
8066 len -= 2;
8068 if (len >= 1) {
8069 MC_(helperc_b_store1)( a, otag );
8070 //a++;
8071 len--;
8073 tl_assert(len == 0);
8076 __attribute__((noinline))
8077 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
8078 if ((a & 1) && len >= 1) {
8079 MC_(helperc_b_store1)( a, 0 );
8080 a++;
8081 len--;
8083 if ((a & 2) && len >= 2) {
8084 MC_(helperc_b_store2)( a, 0 );
8085 a += 2;
8086 len -= 2;
8088 if ((a & 4) && len >= 4) {
8089 MC_(helperc_b_store4)( a, 0 );
8090 a += 4;
8091 len -= 4;
8093 if ((a & 8) && len >= 8) {
8094 MC_(helperc_b_store8)( a, 0 );
8095 a += 8;
8096 len -= 8;
8098 if ((a & 16) && len >= 16) {
8099 MC_(helperc_b_store16)( a, 0 );
8100 a += 16;
8101 len -= 16;
8103 if (len >= 32) {
8104 tl_assert(0 == (a & 31));
8105 while (len >= 32) {
8106 MC_(helperc_b_store32)( a, 0 );
8107 a += 32;
8108 len -= 32;
8111 if (len >= 16) {
8112 MC_(helperc_b_store16)( a, 0 );
8113 a += 16;
8114 len -= 16;
8116 if (len >= 8) {
8117 MC_(helperc_b_store8)( a, 0 );
8118 a += 8;
8119 len -= 8;
8121 if (len >= 4) {
8122 MC_(helperc_b_store4)( a, 0 );
8123 a += 4;
8124 len -= 4;
8126 if (len >= 2) {
8127 MC_(helperc_b_store2)( a, 0 );
8128 a += 2;
8129 len -= 2;
8131 if (len >= 1) {
8132 MC_(helperc_b_store1)( a, 0 );
8133 //a++;
8134 len--;
8136 tl_assert(len == 0);
8140 /*------------------------------------------------------------*/
8141 /*--- Setup and finalisation ---*/
8142 /*------------------------------------------------------------*/
8144 static void mc_post_clo_init ( void )
8146 /* If we've been asked to emit XML, mash around various other
8147 options so as to constrain the output somewhat. */
8148 if (VG_(clo_xml)) {
8149 /* Extract as much info as possible from the leak checker. */
8150 MC_(clo_leak_check) = LC_Full;
8153 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
8154 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8155 VG_(message)(Vg_UserMsg,
8156 "Warning: --freelist-big-blocks value %lld has no effect\n"
8157 "as it is >= to --freelist-vol value %lld\n",
8158 MC_(clo_freelist_big_blocks),
8159 MC_(clo_freelist_vol));
8162 if (MC_(clo_workaround_gcc296_bugs)
8163 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8164 VG_(umsg)(
8165 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
8166 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
8167 "\n"
8171 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
8173 if (MC_(clo_mc_level) == 3) {
8174 /* We're doing origin tracking. */
8175 # ifdef PERF_FAST_STACK
8176 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
8177 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
8178 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
8179 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
8180 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
8181 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
8182 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
8183 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
8184 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
8185 # endif
8186 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
8187 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
8188 } else {
8189 /* Not doing origin tracking */
8190 # ifdef PERF_FAST_STACK
8191 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
8192 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
8193 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
8194 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
8195 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
8196 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
8197 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
8198 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
8199 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
8200 # endif
8201 VG_(track_new_mem_stack) ( mc_new_mem_stack );
8202 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
8205 // We assume that brk()/sbrk() does not initialise new memory. Is this
8206 // accurate? John Reiser says:
8208 // 0) sbrk() can *decrease* process address space. No zero fill is done
8209 // for a decrease, not even the fragment on the high end of the last page
8210 // that is beyond the new highest address. For maximum safety and
8211 // portability, then the bytes in the last page that reside above [the
8212 // new] sbrk(0) should be considered to be uninitialized, but in practice
8213 // it is exceedingly likely that they will retain their previous
8214 // contents.
8216 // 1) If an increase is large enough to require new whole pages, then
8217 // those new whole pages (like all new pages) are zero-filled by the
8218 // operating system. So if sbrk(0) already is page aligned, then
8219 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
8221 // 2) Any increase that lies within an existing allocated page is not
8222 // changed. So if (x = sbrk(0)) is not page aligned, then
8223 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
8224 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
8225 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
8226 // of them come along for the ride because the operating system deals
8227 // only in whole pages. Again, for maximum safety and portability, then
8228 // anything that lives above [the new] sbrk(0) should be considered
8229 // uninitialized, but in practice will retain previous contents [zero in
8230 // this case.]"
8232 // In short:
8234 // A key property of sbrk/brk is that new whole pages that are supplied
8235 // by the operating system *do* get initialized to zero.
8237 // As for the portability of all this:
8239 // sbrk and brk are not POSIX. However, any system that is a derivative
8240 // of *nix has sbrk and brk because there are too many software (such as
8241 // the Bourne shell) which rely on the traditional memory map (.text,
8242 // .data+.bss, stack) and the existence of sbrk/brk.
8244 // So we should arguably observe all this. However:
8245 // - The current inaccuracy has caused maybe one complaint in seven years(?)
8246 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
8247 // doubt most programmers know the above information.
8248 // So I'm not terribly unhappy with marking it as undefined. --njn.
8250 // [More: I think most of what John said only applies to sbrk(). It seems
8251 // that brk() always deals in whole pages. And since this event deals
8252 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
8253 // just mark all memory it allocates as defined.]
8255 # if !defined(VGO_solaris)
8256 if (MC_(clo_mc_level) == 3)
8257 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
8258 else
8259 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
8260 # else
8261 // On Solaris, brk memory has to be marked as defined, otherwise we get
8262 // many false positives.
8263 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
8264 # endif
8266 /* This origin tracking cache is huge (~100M), so only initialise
8267 if we need it. */
8268 if (MC_(clo_mc_level) >= 3) {
8269 init_OCache();
8270 tl_assert(ocacheL1 != NULL);
8271 for (UInt i = 0; i < 4096; i++ ) {
8272 tl_assert(ocachesL2[i] != NULL);
8274 } else {
8275 tl_assert(ocacheL1 == NULL);
8276 for (UInt i = 0; i < 4096; i++ ) {
8277 tl_assert(ocachesL2[i] == NULL);
8281 MC_(chunk_poolalloc) = VG_(newPA)
8282 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
8283 1000,
8284 VG_(malloc),
8285 "mc.cMC.1 (MC_Chunk pools)",
8286 VG_(free));
8288 /* Do not check definedness of guest state if --undef-value-errors=no */
8289 if (MC_(clo_mc_level) >= 2)
8290 VG_(track_pre_reg_read) ( mc_pre_reg_read );
8292 if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
8293 if (MC_(clo_keep_stacktraces) == KS_none
8294 || MC_(clo_keep_stacktraces) == KS_free)
8295 VG_(fmsg_bad_option)("--keep-stacktraces",
8296 "To use --xtree-memory=full, you must"
8297 " keep at least the alloc stacktrace\n");
8298 // Activate full xtree memory profiling.
8299 VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
8304 static void print_SM_info(const HChar* type, Int n_SMs)
8306 VG_(message)(Vg_DebugMsg,
8307 " memcheck: SMs: %s = %d (%luk, %luM)\n",
8308 type,
8309 n_SMs,
8310 n_SMs * sizeof(SecMap) / 1024UL,
8311 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
8314 static void mc_print_stats (void)
8316 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
8318 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
8319 VG_(free_queue_volume), VG_(free_queue_length));
8320 VG_(message)(Vg_DebugMsg,
8321 " memcheck: sanity checks: %d cheap, %d expensive\n",
8322 n_sanity_cheap, n_sanity_expensive );
8323 VG_(message)(Vg_DebugMsg,
8324 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
8325 n_auxmap_L2_nodes,
8326 n_auxmap_L2_nodes * 64,
8327 n_auxmap_L2_nodes / 16 );
8328 VG_(message)(Vg_DebugMsg,
8329 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
8330 n_auxmap_L1_searches, n_auxmap_L1_cmps,
8331 (10ULL * n_auxmap_L1_cmps)
8332 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
8334 VG_(message)(Vg_DebugMsg,
8335 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
8336 n_auxmap_L2_searches, n_auxmap_L2_nodes
8339 print_SM_info("n_issued ", n_issued_SMs);
8340 print_SM_info("n_deissued ", n_deissued_SMs);
8341 print_SM_info("max_noaccess ", max_noaccess_SMs);
8342 print_SM_info("max_undefined", max_undefined_SMs);
8343 print_SM_info("max_defined ", max_defined_SMs);
8344 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
8346 // Three DSMs, plus the non-DSM ones
8347 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
8348 // The 3*sizeof(Word) bytes is the AVL node metadata size.
8349 // The VG_ROUNDUP is because the OSet pool allocator will/must align
8350 // the elements on pointer size.
8351 // Note that the pool allocator has some additional small overhead
8352 // which is not counted in the below.
8353 // Hardwiring this logic sucks, but I don't see how else to do it.
8354 max_secVBit_szB = max_secVBit_nodes *
8355 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
8356 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
8358 VG_(message)(Vg_DebugMsg,
8359 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
8360 max_secVBit_nodes, max_secVBit_szB / 1024,
8361 max_secVBit_szB / (1024 * 1024));
8362 VG_(message)(Vg_DebugMsg,
8363 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8364 sec_vbits_new_nodes + sec_vbits_updates,
8365 sec_vbits_new_nodes, sec_vbits_updates );
8366 VG_(message)(Vg_DebugMsg,
8367 " memcheck: max shadow mem size: %luk, %luM\n",
8368 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
8370 if (MC_(clo_mc_level) >= 3) {
8371 VG_(message)(Vg_DebugMsg,
8372 " ocacheL1: %'14lu refs %'14lu misses (%'lu lossage)\n",
8373 stats_ocacheL1_find,
8374 stats_ocacheL1_misses,
8375 stats_ocacheL1_lossage );
8376 VG_(message)(Vg_DebugMsg,
8377 " ocacheL1: %'14lu at 0 %'14lu at 1\n",
8378 stats_ocacheL1_find - stats_ocacheL1_misses
8379 - stats_ocacheL1_found_at_1
8380 - stats_ocacheL1_found_at_N,
8381 stats_ocacheL1_found_at_1 );
8382 VG_(message)(Vg_DebugMsg,
8383 " ocacheL1: %'14lu at 2+ %'14lu move-fwds\n",
8384 stats_ocacheL1_found_at_N,
8385 stats_ocacheL1_movefwds );
8386 VG_(message)(Vg_DebugMsg,
8387 " ocacheL1: %'14lu sizeB %'14d useful\n",
8388 (SizeT)sizeof(OCache),
8389 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
8390 VG_(message)(Vg_DebugMsg,
8391 " ocacheL2: %'14lu finds %'14lu misses\n",
8392 stats__ocacheL2_finds,
8393 stats__ocacheL2_misses );
8394 VG_(message)(Vg_DebugMsg,
8395 " ocacheL2: %'14lu adds %'14lu dels\n",
8396 stats__ocacheL2_adds,
8397 stats__ocacheL2_dels );
8398 VG_(message)(Vg_DebugMsg,
8399 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8400 stats__ocacheL2_n_nodes_max,
8401 stats__ocacheL2_n_nodes );
8402 VG_(message)(Vg_DebugMsg,
8403 " niacache: %'12lu refs %'12lu misses\n",
8404 stats__nia_cache_queries, stats__nia_cache_misses);
8405 } else {
8406 tl_assert(ocacheL1 == NULL);
8407 for (UInt i = 0; i < 4096; i++ ) {
8408 tl_assert(ocachesL2[1] == NULL);
8414 static void mc_fini ( Int exitcode )
8416 MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
8417 MC_(print_malloc_stats)();
8419 if (MC_(clo_leak_check) != LC_Off) {
8420 LeakCheckParams lcp;
8421 HChar* xt_filename = NULL;
8422 lcp.mode = MC_(clo_leak_check);
8423 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
8424 lcp.heuristics = MC_(clo_leak_check_heuristics);
8425 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
8426 lcp.deltamode = LCD_Any;
8427 lcp.max_loss_records_output = 999999999;
8428 lcp.requested_by_monitor_command = False;
8429 if (MC_(clo_xtree_leak)) {
8430 xt_filename = VG_(expand_file_name)("--xtree-leak-file",
8431 MC_(clo_xtree_leak_file));
8432 lcp.xt_filename = xt_filename;
8433 lcp.mode = LC_Full;
8434 lcp.show_leak_kinds = MC_(all_Reachedness)();
8436 else
8437 lcp.xt_filename = NULL;
8438 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
8439 if (MC_(clo_xtree_leak))
8440 VG_(free)(xt_filename);
8441 } else {
8442 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8443 VG_(umsg)(
8444 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8445 "\n"
8450 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
8451 && MC_(clo_mc_level) == 2) {
8452 VG_(message)(Vg_UserMsg,
8453 "Use --track-origins=yes to see where "
8454 "uninitialised values come from\n");
8457 /* Print a warning if any client-request generated ignore-ranges
8458 still exist. It would be reasonable to expect that a properly
8459 written program would remove any such ranges before exiting, and
8460 since they are a bit on the dangerous side, let's comment. By
8461 contrast ranges which are specified on the command line normally
8462 pertain to hardware mapped into the address space, and so we
8463 can't expect the client to have got rid of them. */
8464 if (gIgnoredAddressRanges) {
8465 UInt i, nBad = 0;
8466 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
8467 UWord val = IAR_INVALID;
8468 UWord key_min = ~(UWord)0;
8469 UWord key_max = (UWord)0;
8470 VG_(indexRangeMap)( &key_min, &key_max, &val,
8471 gIgnoredAddressRanges, i );
8472 if (val != IAR_ClientReq)
8473 continue;
8474 /* Print the offending range. Also, if it is the first,
8475 print a banner before it. */
8476 nBad++;
8477 if (nBad == 1) {
8478 VG_(umsg)(
8479 "WARNING: exiting program has the following client-requested\n"
8480 "WARNING: address error disablement range(s) still in force,\n"
8481 "WARNING: "
8482 "possibly as a result of some mistake in the use of the\n"
8483 "WARNING: "
8484 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8487 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n",
8488 i, key_min, key_max, showIARKind(val));
8492 done_prof_mem();
8494 if (VG_(clo_stats))
8495 mc_print_stats();
8497 if (0) {
8498 VG_(message)(Vg_DebugMsg,
8499 "------ Valgrind's client block stats follow ---------------\n" );
8500 show_client_block_stats();
8504 /* mark the given addr/len unaddressable for watchpoint implementation
8505 The PointKind will be handled at access time */
8506 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
8507 Addr addr, SizeT len)
8509 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8510 accessibility and definedness in gdbserver so as to allow restoring it
8511 properly. Currently, we assume that the user only watches things
8512 which are properly addressable and defined */
8513 if (insert)
8514 MC_(make_mem_noaccess) (addr, len);
8515 else
8516 MC_(make_mem_defined) (addr, len);
8517 return True;
8520 static void mc_pre_clo_init(void)
8522 VG_(details_name) ("Memcheck");
8523 VG_(details_version) (NULL);
8524 VG_(details_description) ("a memory error detector");
8525 VG_(details_copyright_author)(
8526 "Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.");
8527 VG_(details_bug_reports_to) (VG_BUGS_TO);
8528 VG_(details_avg_translation_sizeB) ( 640 );
8530 VG_(basic_tool_funcs) (mc_post_clo_init,
8531 MC_(instrument),
8532 mc_fini);
8534 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
8537 VG_(needs_core_errors) ();
8538 VG_(needs_tool_errors) (MC_(eq_Error),
8539 MC_(before_pp_Error),
8540 MC_(pp_Error),
8541 True,/*show TIDs for errors*/
8542 MC_(update_Error_extra),
8543 MC_(is_recognised_suppression),
8544 MC_(read_extra_suppression_info),
8545 MC_(error_matches_suppression),
8546 MC_(get_error_name),
8547 MC_(get_extra_suppression_info),
8548 MC_(print_extra_suppression_use),
8549 MC_(update_extra_suppression_use));
8550 VG_(needs_libc_freeres) ();
8551 VG_(needs_cxx_freeres) ();
8552 VG_(needs_command_line_options)(mc_process_cmd_line_options,
8553 mc_print_usage,
8554 mc_print_debug_usage);
8555 VG_(needs_client_requests) (mc_handle_client_request);
8556 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
8557 mc_expensive_sanity_check);
8558 VG_(needs_print_stats) (mc_print_stats);
8559 VG_(needs_info_location) (MC_(pp_describe_addr));
8560 VG_(needs_malloc_replacement) (MC_(malloc),
8561 MC_(__builtin_new),
8562 MC_(__builtin_new_aligned),
8563 MC_(__builtin_vec_new),
8564 MC_(__builtin_vec_new_aligned),
8565 MC_(memalign),
8566 MC_(calloc),
8567 MC_(free),
8568 MC_(__builtin_delete),
8569 MC_(__builtin_delete_aligned),
8570 MC_(__builtin_vec_delete),
8571 MC_(__builtin_vec_delete_aligned),
8572 MC_(realloc),
8573 MC_(malloc_usable_size),
8574 MC_MALLOC_DEFAULT_REDZONE_SZB );
8575 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
8577 VG_(needs_xml_output) ();
8579 VG_(track_new_mem_startup) ( mc_new_mem_startup );
8581 // Handling of mmap and mprotect isn't simple (well, it is simple,
8582 // but the justification isn't.) See comments above, just prior to
8583 // mc_new_mem_mmap.
8584 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
8585 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
8587 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
8589 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
8590 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
8591 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
8593 /* Defer the specification of the new_mem_stack functions to the
8594 post_clo_init function, since we need to first parse the command
8595 line before deciding which set to use. */
8597 # ifdef PERF_FAST_STACK
8598 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
8599 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
8600 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
8601 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
8602 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
8603 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
8604 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
8605 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
8606 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
8607 # endif
8608 VG_(track_die_mem_stack) ( mc_die_mem_stack );
8610 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
8612 VG_(track_pre_mem_read) ( check_mem_is_defined );
8613 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
8614 VG_(track_pre_mem_write) ( check_mem_is_addressable );
8615 VG_(track_post_mem_write) ( mc_post_mem_write );
8617 VG_(track_post_reg_write) ( mc_post_reg_write );
8618 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
8620 if (MC_(clo_mc_level) >= 2) {
8621 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
8622 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
8625 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
8627 init_shadow_memory();
8628 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8629 tl_assert(MC_(chunk_poolalloc) == NULL);
8630 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
8631 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
8632 init_prof_mem();
8634 tl_assert( mc_expensive_sanity_check() );
8636 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8637 tl_assert(sizeof(UWord) == sizeof(Addr));
8638 // Call me paranoid. I don't care.
8639 tl_assert(sizeof(void*) == sizeof(Addr));
8641 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8642 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
8644 /* This is small. Always initialise it. */
8645 init_nia_to_ecu_cache();
8647 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8648 if we need to, since the command line args haven't been
8649 processed yet. Hence defer it to mc_post_clo_init. */
8650 tl_assert(ocacheL1 == NULL);
8651 for (UInt i = 0; i < 4096; i++ ) {
8652 tl_assert(ocachesL2[i] == NULL);
8655 /* Check some important stuff. See extensive comments above
8656 re UNALIGNED_OR_HIGH for background. */
8657 # if VG_WORDSIZE == 4
8658 tl_assert(sizeof(void*) == 4);
8659 tl_assert(sizeof(Addr) == 4);
8660 tl_assert(sizeof(UWord) == 4);
8661 tl_assert(sizeof(Word) == 4);
8662 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
8663 tl_assert(MASK(1) == 0UL);
8664 tl_assert(MASK(2) == 1UL);
8665 tl_assert(MASK(4) == 3UL);
8666 tl_assert(MASK(8) == 7UL);
8667 # else
8668 tl_assert(VG_WORDSIZE == 8);
8669 tl_assert(sizeof(void*) == 8);
8670 tl_assert(sizeof(Addr) == 8);
8671 tl_assert(sizeof(UWord) == 8);
8672 tl_assert(sizeof(Word) == 8);
8673 tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
8674 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
8675 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
8676 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
8677 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
8678 # endif
8680 /* Check some assertions to do with the instrumentation machinery. */
8681 MC_(do_instrumentation_startup_checks)();
8684 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
8686 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
8688 /*--------------------------------------------------------------------*/
8689 /*--- end mc_main.c ---*/
8690 /*--------------------------------------------------------------------*/