Finalise bug-fix status notes.
[valgrind.git] / memcheck / mc_main.c
blob253f091dc312c187ece6787fc7107e491b3c787b
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
6 /*--- mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
9 /*
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
14 jseward@acm.org
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
31 The GNU General Public License is contained in the file COPYING.
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h" // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 #include "pub_tool_xarray.h"
51 #include "pub_tool_xtree.h"
52 #include "pub_tool_xtmemory.h"
54 #include "mc_include.h"
55 #include "memcheck.h" /* for client requests */
57 /* Set to 1 to do a little more sanity checking */
58 #define VG_DEBUG_MEMORY 0
60 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
62 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
63 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
66 /*------------------------------------------------------------*/
67 /*--- Fast-case knobs ---*/
68 /*------------------------------------------------------------*/
70 // Comment these out to disable the fast cases (don't just set them to zero).
72 /* PERF_FAST_LOADV is in mc_include.h */
73 #define PERF_FAST_STOREV 1
75 #define PERF_FAST_SARP 1
77 #define PERF_FAST_STACK 1
78 #define PERF_FAST_STACK2 1
80 /* Change this to 1 to enable assertions on origin tracking cache fast
81 paths */
82 #define OC_ENABLE_ASSERTIONS 0
85 /*------------------------------------------------------------*/
86 /*--- Comments on the origin tracking implementation ---*/
87 /*------------------------------------------------------------*/
89 /* See detailed comment entitled
90 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
91 which is contained further on in this file. */
94 /*------------------------------------------------------------*/
95 /*--- V bits and A bits ---*/
96 /*------------------------------------------------------------*/
98 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
99 thinks the corresponding value bit is defined. And every memory byte
100 has an A bit, which tracks whether Memcheck thinks the program can access
101 it safely (ie. it's mapped, and has at least one of the RWX permission bits
102 set). So every N-bit register is shadowed with N V bits, and every memory
103 byte is shadowed with 8 V bits and one A bit.
105 In the implementation, we use two forms of compression (compressed V bits
106 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
107 for memory.
109 Memcheck also tracks extra information about each heap block that is
110 allocated, for detecting memory leaks and other purposes.
113 /*------------------------------------------------------------*/
114 /*--- Basic A/V bitmap representation. ---*/
115 /*------------------------------------------------------------*/
117 /* All reads and writes are checked against a memory map (a.k.a. shadow
118 memory), which records the state of all memory in the process.
120 On 32-bit machines the memory map is organised as follows.
121 The top 16 bits of an address are used to index into a top-level
122 map table, containing 65536 entries. Each entry is a pointer to a
123 second-level map, which records the accesibililty and validity
124 permissions for the 65536 bytes indexed by the lower 16 bits of the
125 address. Each byte is represented by two bits (details are below). So
126 each second-level map contains 16384 bytes. This two-level arrangement
127 conveniently divides the 4G address space into 64k lumps, each size 64k
128 bytes.
130 All entries in the primary (top-level) map must point to a valid
131 secondary (second-level) map. Since many of the 64kB chunks will
132 have the same status for every bit -- ie. noaccess (for unused
133 address space) or entirely addressable and defined (for code segments) --
134 there are three distinguished secondary maps, which indicate 'noaccess',
135 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
136 map entry points to the relevant distinguished map. In practice,
137 typically more than half of the addressable memory is represented with
138 the 'undefined' or 'defined' distinguished secondary map, so it gives a
139 good saving. It also lets us set the V+A bits of large address regions
140 quickly in set_address_range_perms().
142 On 64-bit machines it's more complicated. If we followed the same basic
143 scheme we'd have a four-level table which would require too many memory
144 accesses. So instead the top-level map table has 2^20 entries (indexed
145 using bits 16..35 of the address); this covers the bottom 64GB. Any
146 accesses above 64GB are handled with a slow, sparse auxiliary table.
147 Valgrind's address space manager tries very hard to keep things below
148 this 64GB barrier so that performance doesn't suffer too much.
150 Note that this file has a lot of different functions for reading and
151 writing shadow memory. Only a couple are strictly necessary (eg.
152 get_vabits2 and set_vabits2), most are just specialised for specific
153 common cases to improve performance.
155 Aside: the V+A bits are less precise than they could be -- we have no way
156 of marking memory as read-only. It would be great if we could add an
157 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
158 which requires 2.3 bits to hold, and there's no way to do that elegantly
159 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
160 seem worth it.
163 /* --------------- Basic configuration --------------- */
165 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
167 #if VG_WORDSIZE == 4
169 /* cover the entire address space */
170 # define N_PRIMARY_BITS 16
172 #else
174 /* Just handle the first 128G fast and the rest via auxiliary
175 primaries. If you change this, Memcheck will assert at startup.
176 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
177 # define N_PRIMARY_BITS 21
179 #endif
182 /* Do not change this. */
183 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
185 /* Do not change this. */
186 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
189 /* --------------- Secondary maps --------------- */
191 // Each byte of memory conceptually has an A bit, which indicates its
192 // addressability, and 8 V bits, which indicates its definedness.
194 // But because very few bytes are partially defined, we can use a nice
195 // compression scheme to reduce the size of shadow memory. Each byte of
196 // memory has 2 bits which indicates its state (ie. V+A bits):
198 // 00: noaccess (unaddressable but treated as fully defined)
199 // 01: undefined (addressable and fully undefined)
200 // 10: defined (addressable and fully defined)
201 // 11: partdefined (addressable and partially defined)
203 // In the "partdefined" case, we use a secondary table to store the V bits.
204 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
205 // bits.
207 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
208 // four bytes (32 bits) of memory are in each chunk. Hence the name
209 // "vabits8". This lets us get the V+A bits for four bytes at a time
210 // easily (without having to do any shifting and/or masking), and that is a
211 // very common operation. (Note that although each vabits8 chunk
212 // is 8 bits in size, it represents 32 bits of memory.)
214 // The representation is "inverse" little-endian... each 4 bytes of
215 // memory is represented by a 1 byte value, where:
217 // - the status of byte (a+0) is held in bits [1..0]
218 // - the status of byte (a+1) is held in bits [3..2]
219 // - the status of byte (a+2) is held in bits [5..4]
220 // - the status of byte (a+3) is held in bits [7..6]
222 // It's "inverse" because endianness normally describes a mapping from
223 // value bits to memory addresses; in this case the mapping is inverted.
224 // Ie. instead of particular value bits being held in certain addresses, in
225 // this case certain addresses are represented by particular value bits.
226 // See insert_vabits2_into_vabits8() for an example.
228 // But note that we don't compress the V bits stored in registers; they
229 // need to be explicit to made the shadow operations possible. Therefore
230 // when moving values between registers and memory we need to convert
231 // between the expanded in-register format and the compressed in-memory
232 // format. This isn't so difficult, it just requires careful attention in a
233 // few places.
235 // These represent eight bits of memory.
236 #define VA_BITS2_NOACCESS 0x0 // 00b
237 #define VA_BITS2_UNDEFINED 0x1 // 01b
238 #define VA_BITS2_DEFINED 0x2 // 10b
239 #define VA_BITS2_PARTDEFINED 0x3 // 11b
241 // These represent 16 bits of memory.
242 #define VA_BITS4_NOACCESS 0x0 // 00_00b
243 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
244 #define VA_BITS4_DEFINED 0xa // 10_10b
246 // These represent 32 bits of memory.
247 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
248 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
249 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
251 // These represent 64 bits of memory.
252 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
253 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
254 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
256 // These represent 128 bits of memory.
257 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
260 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
261 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
262 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
264 // Paranoia: it's critical for performance that the requested inlining
265 // occurs. So try extra hard.
266 #define INLINE inline __attribute__((always_inline))
268 static INLINE Addr start_of_this_sm ( Addr a ) {
269 return (a & (~SM_MASK));
271 static INLINE Bool is_start_of_sm ( Addr a ) {
272 return (start_of_this_sm(a) == a);
275 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
277 typedef
278 union {
279 UChar vabits8[SM_CHUNKS];
280 UShort vabits16[SM_CHUNKS/2];
282 SecMap;
284 // 3 distinguished secondary maps, one for no-access, one for
285 // accessible but undefined, and one for accessible and defined.
286 // Distinguished secondaries may never be modified.
287 #define SM_DIST_NOACCESS 0
288 #define SM_DIST_UNDEFINED 1
289 #define SM_DIST_DEFINED 2
291 static SecMap sm_distinguished[3];
293 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
294 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
297 // Forward declaration
298 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
300 /* dist_sm points to one of our three distinguished secondaries. Make
301 a copy of it so that we can write to it.
303 static SecMap* copy_for_writing ( SecMap* dist_sm )
305 SecMap* new_sm;
306 tl_assert(dist_sm == &sm_distinguished[0]
307 || dist_sm == &sm_distinguished[1]
308 || dist_sm == &sm_distinguished[2]);
310 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
311 if (new_sm == NULL)
312 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
313 sizeof(SecMap) );
314 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
315 update_SM_counts(dist_sm, new_sm);
316 return new_sm;
319 /* --------------- Stats --------------- */
321 static Int n_issued_SMs = 0;
322 static Int n_deissued_SMs = 0;
323 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
324 static Int n_undefined_SMs = 0;
325 static Int n_defined_SMs = 0;
326 static Int n_non_DSM_SMs = 0;
327 static Int max_noaccess_SMs = 0;
328 static Int max_undefined_SMs = 0;
329 static Int max_defined_SMs = 0;
330 static Int max_non_DSM_SMs = 0;
332 /* # searches initiated in auxmap_L1, and # base cmps required */
333 static ULong n_auxmap_L1_searches = 0;
334 static ULong n_auxmap_L1_cmps = 0;
335 /* # of searches that missed in auxmap_L1 and therefore had to
336 be handed to auxmap_L2. And the number of nodes inserted. */
337 static ULong n_auxmap_L2_searches = 0;
338 static ULong n_auxmap_L2_nodes = 0;
340 static Int n_sanity_cheap = 0;
341 static Int n_sanity_expensive = 0;
343 static Int n_secVBit_nodes = 0;
344 static Int max_secVBit_nodes = 0;
346 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
348 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
349 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
350 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
351 else { n_non_DSM_SMs --;
352 n_deissued_SMs ++; }
354 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
355 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
356 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
357 else { n_non_DSM_SMs ++;
358 n_issued_SMs ++; }
360 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
361 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
362 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
363 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
366 /* --------------- Primary maps --------------- */
368 /* The main primary map. This covers some initial part of the address
369 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
370 handled using the auxiliary primary map.
372 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
373 && (defined(VGP_arm_linux) \
374 || defined(VGP_x86_linux) || defined(VGP_x86_solaris))
375 /* mc_main_asm.c needs visibility on a few things declared in this file.
376 MC_MAIN_STATIC allows to define them static if ok, i.e. on
377 platforms that are not using hand-coded asm statements. */
378 #define MC_MAIN_STATIC
379 #else
380 #define MC_MAIN_STATIC static
381 #endif
382 MC_MAIN_STATIC SecMap* primary_map[N_PRIMARY_MAP];
385 /* An entry in the auxiliary primary map. base must be a 64k-aligned
386 value, and sm points at the relevant secondary map. As with the
387 main primary map, the secondary may be either a real secondary, or
388 one of the three distinguished secondaries. DO NOT CHANGE THIS
389 LAYOUT: the first word has to be the key for OSet fast lookups.
391 typedef
392 struct {
393 Addr base;
394 SecMap* sm;
396 AuxMapEnt;
398 /* Tunable parameter: How big is the L1 queue? */
399 #define N_AUXMAP_L1 24
401 /* Tunable parameter: How far along the L1 queue to insert
402 entries resulting from L2 lookups? */
403 #define AUXMAP_L1_INSERT_IX 12
405 static struct {
406 Addr base;
407 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
409 auxmap_L1[N_AUXMAP_L1];
411 static OSet* auxmap_L2 = NULL;
413 static void init_auxmap_L1_L2 ( void )
415 Int i;
416 for (i = 0; i < N_AUXMAP_L1; i++) {
417 auxmap_L1[i].base = 0;
418 auxmap_L1[i].ent = NULL;
421 tl_assert(0 == offsetof(AuxMapEnt,base));
422 tl_assert(sizeof(Addr) == sizeof(void*));
423 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
424 /*fastCmp*/ NULL,
425 VG_(malloc), "mc.iaLL.1", VG_(free) );
428 /* Check representation invariants; if OK return NULL; else a
429 descriptive bit of text. Also return the number of
430 non-distinguished secondary maps referred to from the auxiliary
431 primary maps. */
433 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
435 Word i, j;
436 /* On a 32-bit platform, the L2 and L1 tables should
437 both remain empty forever.
439 On a 64-bit platform:
440 In the L2 table:
441 all .base & 0xFFFF == 0
442 all .base > MAX_PRIMARY_ADDRESS
443 In the L1 table:
444 all .base & 0xFFFF == 0
445 all (.base > MAX_PRIMARY_ADDRESS
446 .base & 0xFFFF == 0
447 and .ent points to an AuxMapEnt with the same .base)
449 (.base == 0 and .ent == NULL)
451 *n_secmaps_found = 0;
452 if (sizeof(void*) == 4) {
453 /* 32-bit platform */
454 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
455 return "32-bit: auxmap_L2 is non-empty";
456 for (i = 0; i < N_AUXMAP_L1; i++)
457 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
458 return "32-bit: auxmap_L1 is non-empty";
459 } else {
460 /* 64-bit platform */
461 UWord elems_seen = 0;
462 AuxMapEnt *elem, *res;
463 AuxMapEnt key;
464 /* L2 table */
465 VG_(OSetGen_ResetIter)(auxmap_L2);
466 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
467 elems_seen++;
468 if (0 != (elem->base & (Addr)0xFFFF))
469 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
470 if (elem->base <= MAX_PRIMARY_ADDRESS)
471 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
472 if (elem->sm == NULL)
473 return "64-bit: .sm in _L2 is NULL";
474 if (!is_distinguished_sm(elem->sm))
475 (*n_secmaps_found)++;
477 if (elems_seen != n_auxmap_L2_nodes)
478 return "64-bit: disagreement on number of elems in _L2";
479 /* Check L1-L2 correspondence */
480 for (i = 0; i < N_AUXMAP_L1; i++) {
481 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
482 continue;
483 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
484 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
485 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
486 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
487 if (auxmap_L1[i].ent == NULL)
488 return "64-bit: .ent is NULL in auxmap_L1";
489 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
490 return "64-bit: _L1 and _L2 bases are inconsistent";
491 /* Look it up in auxmap_L2. */
492 key.base = auxmap_L1[i].base;
493 key.sm = 0;
494 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
495 if (res == NULL)
496 return "64-bit: _L1 .base not found in _L2";
497 if (res != auxmap_L1[i].ent)
498 return "64-bit: _L1 .ent disagrees with _L2 entry";
500 /* Check L1 contains no duplicates */
501 for (i = 0; i < N_AUXMAP_L1; i++) {
502 if (auxmap_L1[i].base == 0)
503 continue;
504 for (j = i+1; j < N_AUXMAP_L1; j++) {
505 if (auxmap_L1[j].base == 0)
506 continue;
507 if (auxmap_L1[j].base == auxmap_L1[i].base)
508 return "64-bit: duplicate _L1 .base entries";
512 return NULL; /* ok */
515 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
517 Word i;
518 tl_assert(ent);
519 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
520 for (i = N_AUXMAP_L1-1; i > rank; i--)
521 auxmap_L1[i] = auxmap_L1[i-1];
522 auxmap_L1[rank].base = ent->base;
523 auxmap_L1[rank].ent = ent;
526 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
528 AuxMapEnt key;
529 AuxMapEnt* res;
530 Word i;
532 tl_assert(a > MAX_PRIMARY_ADDRESS);
533 a &= ~(Addr)0xFFFF;
535 /* First search the front-cache, which is a self-organising
536 list containing the most popular entries. */
538 if (LIKELY(auxmap_L1[0].base == a))
539 return auxmap_L1[0].ent;
540 if (LIKELY(auxmap_L1[1].base == a)) {
541 Addr t_base = auxmap_L1[0].base;
542 AuxMapEnt* t_ent = auxmap_L1[0].ent;
543 auxmap_L1[0].base = auxmap_L1[1].base;
544 auxmap_L1[0].ent = auxmap_L1[1].ent;
545 auxmap_L1[1].base = t_base;
546 auxmap_L1[1].ent = t_ent;
547 return auxmap_L1[0].ent;
550 n_auxmap_L1_searches++;
552 for (i = 0; i < N_AUXMAP_L1; i++) {
553 if (auxmap_L1[i].base == a) {
554 break;
557 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
559 n_auxmap_L1_cmps += (ULong)(i+1);
561 if (i < N_AUXMAP_L1) {
562 if (i > 0) {
563 Addr t_base = auxmap_L1[i-1].base;
564 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
565 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
566 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
567 auxmap_L1[i-0].base = t_base;
568 auxmap_L1[i-0].ent = t_ent;
569 i--;
571 return auxmap_L1[i].ent;
574 n_auxmap_L2_searches++;
576 /* First see if we already have it. */
577 key.base = a;
578 key.sm = 0;
580 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
581 if (res)
582 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
583 return res;
586 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
588 AuxMapEnt *nyu, *res;
590 /* First see if we already have it. */
591 res = maybe_find_in_auxmap( a );
592 if (LIKELY(res))
593 return res;
595 /* Ok, there's no entry in the secondary map, so we'll have
596 to allocate one. */
597 a &= ~(Addr)0xFFFF;
599 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
600 nyu->base = a;
601 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
602 VG_(OSetGen_Insert)( auxmap_L2, nyu );
603 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
604 n_auxmap_L2_nodes++;
605 return nyu;
608 /* --------------- SecMap fundamentals --------------- */
610 // In all these, 'low' means it's definitely in the main primary map,
611 // 'high' means it's definitely in the auxiliary table.
613 static INLINE UWord get_primary_map_low_offset ( Addr a )
615 UWord pm_off = a >> 16;
616 return pm_off;
619 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
621 UWord pm_off = a >> 16;
622 # if VG_DEBUG_MEMORY >= 1
623 tl_assert(pm_off < N_PRIMARY_MAP);
624 # endif
625 return &primary_map[ pm_off ];
628 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
630 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
631 return &am->sm;
634 static INLINE SecMap** get_secmap_ptr ( Addr a )
636 return ( a <= MAX_PRIMARY_ADDRESS
637 ? get_secmap_low_ptr(a)
638 : get_secmap_high_ptr(a));
641 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
643 return *get_secmap_low_ptr(a);
646 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
648 return *get_secmap_high_ptr(a);
651 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
653 SecMap** p = get_secmap_low_ptr(a);
654 if (UNLIKELY(is_distinguished_sm(*p)))
655 *p = copy_for_writing(*p);
656 return *p;
659 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
661 SecMap** p = get_secmap_high_ptr(a);
662 if (UNLIKELY(is_distinguished_sm(*p)))
663 *p = copy_for_writing(*p);
664 return *p;
667 /* Produce the secmap for 'a', either from the primary map or by
668 ensuring there is an entry for it in the aux primary map. The
669 secmap may be a distinguished one as the caller will only want to
670 be able to read it.
672 static INLINE SecMap* get_secmap_for_reading ( Addr a )
674 return ( a <= MAX_PRIMARY_ADDRESS
675 ? get_secmap_for_reading_low (a)
676 : get_secmap_for_reading_high(a) );
679 /* Produce the secmap for 'a', either from the primary map or by
680 ensuring there is an entry for it in the aux primary map. The
681 secmap may not be a distinguished one, since the caller will want
682 to be able to write it. If it is a distinguished secondary, make a
683 writable copy of it, install it, and return the copy instead. (COW
684 semantics).
686 static INLINE SecMap* get_secmap_for_writing ( Addr a )
688 return ( a <= MAX_PRIMARY_ADDRESS
689 ? get_secmap_for_writing_low (a)
690 : get_secmap_for_writing_high(a) );
693 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
694 allocate one if one doesn't already exist. This is used by the
695 leak checker.
697 static SecMap* maybe_get_secmap_for ( Addr a )
699 if (a <= MAX_PRIMARY_ADDRESS) {
700 return get_secmap_for_reading_low(a);
701 } else {
702 AuxMapEnt* am = maybe_find_in_auxmap(a);
703 return am ? am->sm : NULL;
707 /* --------------- Fundamental functions --------------- */
709 static INLINE
710 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
712 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
713 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
714 *vabits8 |= (vabits2 << shift); // mask in the two new bits
717 static INLINE
718 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
720 UInt shift;
721 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
722 shift = (a & 2) << 1; // shift by 0 or 4
723 *vabits8 &= ~(0xf << shift); // mask out the four old bits
724 *vabits8 |= (vabits4 << shift); // mask in the four new bits
727 static INLINE
728 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
730 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
731 vabits8 >>= shift; // shift the two bits to the bottom
732 return 0x3 & vabits8; // mask out the rest
735 static INLINE
736 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
738 UInt shift;
739 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
740 shift = (a & 2) << 1; // shift by 0 or 4
741 vabits8 >>= shift; // shift the four bits to the bottom
742 return 0xf & vabits8; // mask out the rest
745 // Note that these four are only used in slow cases. The fast cases do
746 // clever things like combine the auxmap check (in
747 // get_secmap_{read,writ}able) with alignment checks.
749 // *** WARNING! ***
750 // Any time this function is called, if it is possible that vabits2
751 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
752 // sec-V-bits table must also be set!
753 static INLINE
754 void set_vabits2 ( Addr a, UChar vabits2 )
756 SecMap* sm = get_secmap_for_writing(a);
757 UWord sm_off = SM_OFF(a);
758 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
761 static INLINE
762 UChar get_vabits2 ( Addr a )
764 SecMap* sm = get_secmap_for_reading(a);
765 UWord sm_off = SM_OFF(a);
766 UChar vabits8 = sm->vabits8[sm_off];
767 return extract_vabits2_from_vabits8(a, vabits8);
770 // *** WARNING! ***
771 // Any time this function is called, if it is possible that any of the
772 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
773 // corresponding entry(s) in the sec-V-bits table must also be set!
774 static INLINE
775 UChar get_vabits8_for_aligned_word32 ( Addr a )
777 SecMap* sm = get_secmap_for_reading(a);
778 UWord sm_off = SM_OFF(a);
779 UChar vabits8 = sm->vabits8[sm_off];
780 return vabits8;
783 static INLINE
784 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
786 SecMap* sm = get_secmap_for_writing(a);
787 UWord sm_off = SM_OFF(a);
788 sm->vabits8[sm_off] = vabits8;
792 // Forward declarations
793 static UWord get_sec_vbits8(Addr a);
794 static void set_sec_vbits8(Addr a, UWord vbits8);
796 // Returns False if there was an addressability error.
797 static INLINE
798 Bool set_vbits8 ( Addr a, UChar vbits8 )
800 Bool ok = True;
801 UChar vabits2 = get_vabits2(a);
802 if ( VA_BITS2_NOACCESS != vabits2 ) {
803 // Addressable. Convert in-register format to in-memory format.
804 // Also remove any existing sec V bit entry for the byte if no
805 // longer necessary.
806 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
807 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
808 else { vabits2 = VA_BITS2_PARTDEFINED;
809 set_sec_vbits8(a, vbits8); }
810 set_vabits2(a, vabits2);
812 } else {
813 // Unaddressable! Do nothing -- when writing to unaddressable
814 // memory it acts as a black hole, and the V bits can never be seen
815 // again. So we don't have to write them at all.
816 ok = False;
818 return ok;
821 // Returns False if there was an addressability error. In that case, we put
822 // all defined bits into vbits8.
823 static INLINE
824 Bool get_vbits8 ( Addr a, UChar* vbits8 )
826 Bool ok = True;
827 UChar vabits2 = get_vabits2(a);
829 // Convert the in-memory format to in-register format.
830 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
831 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
832 else if ( VA_BITS2_NOACCESS == vabits2 ) {
833 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
834 ok = False;
835 } else {
836 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
837 *vbits8 = get_sec_vbits8(a);
839 return ok;
843 /* --------------- Secondary V bit table ------------ */
845 // This table holds the full V bit pattern for partially-defined bytes
846 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
847 // memory.
849 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
850 // then overwrite the same address with a fully defined byte, the sec-V-bit
851 // node will not necessarily be removed. This is because checking for
852 // whether removal is necessary would slow down the fast paths.
854 // To avoid the stale nodes building up too much, we periodically (once the
855 // table reaches a certain size) garbage collect (GC) the table by
856 // traversing it and evicting any nodes not having PDB.
857 // If more than a certain proportion of nodes survived, we increase the
858 // table size so that GCs occur less often.
860 // This policy is designed to avoid bad table bloat in the worst case where
861 // a program creates huge numbers of stale PDBs -- we would get this bloat
862 // if we had no GC -- while handling well the case where a node becomes
863 // stale but shortly afterwards is rewritten with a PDB and so becomes
864 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
865 // remove all stale nodes as soon as possible, we just end up re-adding a
866 // lot of them in later again. The "sufficiently stale" approach avoids
867 // this. (If a program has many live PDBs, performance will just suck,
868 // there's no way around that.)
870 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
871 // holding on to stale entries for 2 GCs before discarding them can lead
872 // to massive space leaks. So we're changing to an arrangement where
873 // lines are evicted as soon as they are observed to be stale during a
874 // GC. This also has a side benefit of allowing the sufficiently_stale
875 // field to be removed from the SecVBitNode struct, reducing its size by
876 // 8 bytes, which is a substantial space saving considering that the
877 // struct was previously 32 or so bytes, on a 64 bit target.
879 // In order to try and mitigate the problem that the "sufficiently stale"
880 // heuristic was designed to avoid, the table size is allowed to drift
881 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
882 // means that nodes will exist in the table longer on average, and hopefully
883 // will be deleted and re-added less frequently.
885 // The previous scaling up mechanism (now called STEPUP) is retained:
886 // if residency exceeds 50%, the table is scaled up, although by a
887 // factor sqrt(2) rather than 2 as before. This effectively doubles the
888 // frequency of GCs when there are many PDBs at reduces the tendency of
889 // stale PDBs to reside for long periods in the table.
891 static OSet* secVBitTable;
893 // Stats
894 static ULong sec_vbits_new_nodes = 0;
895 static ULong sec_vbits_updates = 0;
897 // This must be a power of two; this is checked in mc_pre_clo_init().
898 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
899 // a larger address range) they take more space but we can get multiple
900 // partially-defined bytes in one if they are close to each other, reducing
901 // the number of total nodes. In practice sometimes they are clustered (eg.
902 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
903 // row), but often not. So we choose something intermediate.
904 #define BYTES_PER_SEC_VBIT_NODE 16
906 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
907 // more than this many nodes survive a GC.
908 #define STEPUP_SURVIVOR_PROPORTION 0.5
909 #define STEPUP_GROWTH_FACTOR 1.414213562
911 // If the above heuristic doesn't apply, then we may make the table
912 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
913 // this many nodes survive a GC, _and_ the total table size does
914 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
915 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
916 // effectively although gradually reduces residency and increases time
917 // between GCs for programs with small numbers of PDBs. The 80000 limit
918 // effectively limits the table size to around 2MB for programs with
919 // small numbers of PDBs, whilst giving a reasonably long lifetime to
920 // entries, to try and reduce the costs resulting from deleting and
921 // re-adding of entries.
922 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
923 #define DRIFTUP_GROWTH_FACTOR 1.015
924 #define DRIFTUP_MAX_SIZE 80000
926 // We GC the table when it gets this many nodes in it, ie. it's effectively
927 // the table size. It can change.
928 static Int secVBitLimit = 1000;
930 // The number of GCs done, used to age sec-V-bit nodes for eviction.
931 // Because it's unsigned, wrapping doesn't matter -- the right answer will
932 // come out anyway.
933 static UInt GCs_done = 0;
935 typedef
936 struct {
937 Addr a;
938 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
940 SecVBitNode;
942 static OSet* createSecVBitTable(void)
944 OSet* newSecVBitTable;
945 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
946 ( offsetof(SecVBitNode, a),
947 NULL, // use fast comparisons
948 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
949 VG_(free),
950 1000,
951 sizeof(SecVBitNode));
952 return newSecVBitTable;
955 static void gcSecVBitTable(void)
957 OSet* secVBitTable2;
958 SecVBitNode* n;
959 Int i, n_nodes = 0, n_survivors = 0;
961 GCs_done++;
963 // Create the new table.
964 secVBitTable2 = createSecVBitTable();
966 // Traverse the table, moving fresh nodes into the new table.
967 VG_(OSetGen_ResetIter)(secVBitTable);
968 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
969 // Keep node if any of its bytes are non-stale. Using
970 // get_vabits2() for the lookup is not very efficient, but I don't
971 // think it matters.
972 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
973 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
974 // Found a non-stale byte, so keep =>
975 // Insert a copy of the node into the new table.
976 SecVBitNode* n2 =
977 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
978 *n2 = *n;
979 VG_(OSetGen_Insert)(secVBitTable2, n2);
980 break;
985 // Get the before and after sizes.
986 n_nodes = VG_(OSetGen_Size)(secVBitTable);
987 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
989 // Destroy the old table, and put the new one in its place.
990 VG_(OSetGen_Destroy)(secVBitTable);
991 secVBitTable = secVBitTable2;
993 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
994 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
995 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
998 // Increase table size if necessary.
999 if ((Double)n_survivors
1000 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
1001 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
1002 if (VG_(clo_verbosity) > 1)
1003 VG_(message)(Vg_DebugMsg,
1004 "memcheck GC: %d new table size (stepup)\n",
1005 secVBitLimit);
1007 else
1008 if (secVBitLimit < DRIFTUP_MAX_SIZE
1009 && (Double)n_survivors
1010 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
1011 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
1012 if (VG_(clo_verbosity) > 1)
1013 VG_(message)(Vg_DebugMsg,
1014 "memcheck GC: %d new table size (driftup)\n",
1015 secVBitLimit);
1019 static UWord get_sec_vbits8(Addr a)
1021 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1022 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1023 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1024 UChar vbits8;
1025 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1026 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1027 // make it to the secondary V bits table.
1028 vbits8 = n->vbits8[amod];
1029 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1030 return vbits8;
1033 static void set_sec_vbits8(Addr a, UWord vbits8)
1035 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1036 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1037 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1038 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1039 // make it to the secondary V bits table.
1040 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1041 if (n) {
1042 n->vbits8[amod] = vbits8; // update
1043 sec_vbits_updates++;
1044 } else {
1045 // Do a table GC if necessary. Nb: do this before creating and
1046 // inserting the new node, to avoid erroneously GC'ing the new node.
1047 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1048 gcSecVBitTable();
1051 // New node: assign the specific byte, make the rest invalid (they
1052 // should never be read as-is, but be cautious).
1053 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1054 n->a = aAligned;
1055 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1056 n->vbits8[i] = V_BITS8_UNDEFINED;
1058 n->vbits8[amod] = vbits8;
1060 // Insert the new node.
1061 VG_(OSetGen_Insert)(secVBitTable, n);
1062 sec_vbits_new_nodes++;
1064 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1065 if (n_secVBit_nodes > max_secVBit_nodes)
1066 max_secVBit_nodes = n_secVBit_nodes;
1070 /* --------------- Endianness helpers --------------- */
1072 /* Returns the offset in memory of the byteno-th most significant byte
1073 in a wordszB-sized word, given the specified endianness. */
1074 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1075 UWord byteno ) {
1076 return bigendian ? (wordszB-1-byteno) : byteno;
1080 /* --------------- Ignored address ranges --------------- */
1082 /* Denotes the address-error-reportability status for address ranges:
1083 IAR_NotIgnored: the usual case -- report errors in this range
1084 IAR_CommandLine: don't report errors -- from command line setting
1085 IAR_ClientReq: don't report errors -- from client request
1087 typedef
1088 enum { IAR_INVALID=99,
1089 IAR_NotIgnored,
1090 IAR_CommandLine,
1091 IAR_ClientReq }
1092 IARKind;
1094 static const HChar* showIARKind ( IARKind iark )
1096 switch (iark) {
1097 case IAR_INVALID: return "INVALID";
1098 case IAR_NotIgnored: return "NotIgnored";
1099 case IAR_CommandLine: return "CommandLine";
1100 case IAR_ClientReq: return "ClientReq";
1101 default: return "???";
1105 // RangeMap<IARKind>
1106 static RangeMap* gIgnoredAddressRanges = NULL;
1108 static void init_gIgnoredAddressRanges ( void )
1110 if (LIKELY(gIgnoredAddressRanges != NULL))
1111 return;
1112 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1113 VG_(free), IAR_NotIgnored );
1116 Bool MC_(in_ignored_range) ( Addr a )
1118 if (LIKELY(gIgnoredAddressRanges == NULL))
1119 return False;
1120 UWord how = IAR_INVALID;
1121 UWord key_min = ~(UWord)0;
1122 UWord key_max = (UWord)0;
1123 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1124 tl_assert(key_min <= a && a <= key_max);
1125 switch (how) {
1126 case IAR_NotIgnored: return False;
1127 case IAR_CommandLine: return True;
1128 case IAR_ClientReq: return True;
1129 default: break; /* invalid */
1131 VG_(tool_panic)("MC_(in_ignore_range)");
1132 /*NOTREACHED*/
1135 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
1137 if (LIKELY(!MC_(clo_ignore_range_below_sp)))
1138 return False;
1139 tl_assert(szB >= 1 && szB <= 32);
1140 tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
1141 > MC_(clo_ignore_range_below_sp__last_offset));
1142 Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
1143 Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
1144 if (range_lo >= range_hi) {
1145 /* Bizarre. We have a wraparound situation. What should we do? */
1146 return False; // Play safe
1147 } else {
1148 /* This is the expected case. */
1149 if (range_lo <= a && a + szB - 1 <= range_hi)
1150 return True;
1151 else
1152 return False;
1154 /*NOTREACHED*/
1155 tl_assert(0);
1158 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1160 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
1162 Bool ok = VG_(parse_Addr) (ppc, result1);
1163 if (!ok)
1164 return False;
1165 if (**ppc != '-')
1166 return False;
1167 (*ppc)++;
1168 ok = VG_(parse_Addr) (ppc, result2);
1169 if (!ok)
1170 return False;
1171 return True;
1174 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1175 or fail. */
1177 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
1179 Bool ok = VG_(parse_UInt) (ppc, result1);
1180 if (!ok)
1181 return False;
1182 if (**ppc != '-')
1183 return False;
1184 (*ppc)++;
1185 ok = VG_(parse_UInt) (ppc, result2);
1186 if (!ok)
1187 return False;
1188 return True;
1191 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1192 fail. If they are valid, add them to the global set of ignored
1193 ranges. */
1194 static Bool parse_ignore_ranges ( const HChar* str0 )
1196 init_gIgnoredAddressRanges();
1197 const HChar* str = str0;
1198 const HChar** ppc = &str;
1199 while (1) {
1200 Addr start = ~(Addr)0;
1201 Addr end = (Addr)0;
1202 Bool ok = parse_Addr_pair(ppc, &start, &end);
1203 if (!ok)
1204 return False;
1205 if (start > end)
1206 return False;
1207 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1208 if (**ppc == 0)
1209 return True;
1210 if (**ppc != ',')
1211 return False;
1212 (*ppc)++;
1214 /*NOTREACHED*/
1215 return False;
1218 /* Add or remove [start, +len) from the set of ignored ranges. */
1219 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1221 init_gIgnoredAddressRanges();
1222 const Bool verbose = (VG_(clo_verbosity) > 1);
1223 if (len == 0) {
1224 return False;
1226 if (addRange) {
1227 VG_(bindRangeMap)(gIgnoredAddressRanges,
1228 start, start+len-1, IAR_ClientReq);
1229 if (verbose)
1230 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1231 (void*)start, (void*)(start+len-1));
1232 } else {
1233 VG_(bindRangeMap)(gIgnoredAddressRanges,
1234 start, start+len-1, IAR_NotIgnored);
1235 if (verbose)
1236 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1237 (void*)start, (void*)(start+len-1));
1239 if (verbose) {
1240 VG_(dmsg)("memcheck: now have %u ranges:\n",
1241 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1242 UInt i;
1243 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1244 UWord val = IAR_INVALID;
1245 UWord key_min = ~(UWord)0;
1246 UWord key_max = (UWord)0;
1247 VG_(indexRangeMap)( &key_min, &key_max, &val,
1248 gIgnoredAddressRanges, i );
1249 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n",
1250 i, key_min, key_max, showIARKind(val));
1253 return True;
1257 /* --------------- Load/store slow cases. --------------- */
1259 static
1260 __attribute__((noinline))
1261 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1262 Addr a, SizeT nBits, Bool bigendian )
1264 ULong pessim[4]; /* only used when p-l-ok=yes */
1265 SSizeT szB = nBits / 8;
1266 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1267 SSizeT i, j; /* Must be signed. */
1268 SizeT n_addrs_bad = 0;
1269 Addr ai;
1270 UChar vbits8;
1271 Bool ok;
1273 /* Code below assumes load size is a power of two and at least 64
1274 bits. */
1275 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1277 /* If this triggers, you probably just need to increase the size of
1278 the pessim array. */
1279 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1281 for (j = 0; j < szL; j++) {
1282 pessim[j] = V_BITS64_DEFINED;
1283 res[j] = V_BITS64_UNDEFINED;
1286 /* Make up a result V word, which contains the loaded data for
1287 valid addresses and Defined for invalid addresses. Iterate over
1288 the bytes in the word, from the most significant down to the
1289 least. The vbits to return are calculated into vbits128. Also
1290 compute the pessimising value to be used when
1291 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1292 info can be gleaned from the pessim array) but is used as a
1293 cross-check. */
1294 for (j = szL-1; j >= 0; j--) {
1295 ULong vbits64 = V_BITS64_UNDEFINED;
1296 ULong pessim64 = V_BITS64_DEFINED;
1297 UWord long_index = byte_offset_w(szL, bigendian, j);
1298 for (i = 8-1; i >= 0; i--) {
1299 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1300 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1301 ok = get_vbits8(ai, &vbits8);
1302 vbits64 <<= 8;
1303 vbits64 |= vbits8;
1304 if (!ok) n_addrs_bad++;
1305 pessim64 <<= 8;
1306 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1308 res[long_index] = vbits64;
1309 pessim[long_index] = pessim64;
1312 /* In the common case, all the addresses involved are valid, so we
1313 just return the computed V bits and have done. */
1314 if (LIKELY(n_addrs_bad == 0))
1315 return;
1317 /* If there's no possibility of getting a partial-loads-ok
1318 exemption, report the error and quit. */
1319 if (!MC_(clo_partial_loads_ok)) {
1320 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1321 return;
1324 /* The partial-loads-ok excemption might apply. Find out if it
1325 does. If so, don't report an addressing error, but do return
1326 Undefined for the bytes that are out of range, so as to avoid
1327 false negatives. If it doesn't apply, just report an addressing
1328 error in the usual way. */
1330 /* Some code steps along byte strings in aligned chunks
1331 even when there is only a partially defined word at the end (eg,
1332 optimised strlen). This is allowed by the memory model of
1333 modern machines, since an aligned load cannot span two pages and
1334 thus cannot "partially fault".
1336 Therefore, a load from a partially-addressible place is allowed
1337 if all of the following hold:
1338 - the command-line flag is set [by default, it isn't]
1339 - it's an aligned load
1340 - at least one of the addresses in the word *is* valid
1342 Since this suppresses the addressing error, we avoid false
1343 negatives by marking bytes undefined when they come from an
1344 invalid address.
1347 /* "at least one of the addresses is invalid" */
1348 ok = False;
1349 for (j = 0; j < szL; j++)
1350 ok |= pessim[j] != V_BITS64_DEFINED;
1351 tl_assert(ok);
1353 # if defined(VGP_s390x_linux)
1354 tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD
1355 /* OK if all loaded bytes are from the same page. */
1356 Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB);
1357 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1358 /* lxvd2x might generate an unaligned 128 bit vector load. */
1359 Bool alignedOK = (szB == 16);
1360 # else
1361 /* OK if the address is aligned by the load size. */
1362 Bool alignedOK = (0 == (a & (szB - 1)));
1363 # endif
1365 if (alignedOK && n_addrs_bad < szB) {
1366 /* Exemption applies. Use the previously computed pessimising
1367 value and return the combined result, but don't flag an
1368 addressing error. The pessimising value is Defined for valid
1369 addresses and Undefined for invalid addresses. */
1370 /* for assumption that doing bitwise or implements UifU */
1371 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1372 /* (really need "UifU" here...)
1373 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1374 for (j = szL-1; j >= 0; j--)
1375 res[j] |= pessim[j];
1376 return;
1379 /* Exemption doesn't apply. Flag an addressing error in the normal
1380 way. */
1381 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1384 MC_MAIN_STATIC
1385 __attribute__((noinline))
1386 __attribute__((used))
1387 VG_REGPARM(3)
1388 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian );
1390 MC_MAIN_STATIC
1391 __attribute__((noinline))
1392 __attribute__((used))
1393 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1394 this function may get called from hand written assembly. */
1395 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1397 PROF_EVENT(MCPE_LOADVN_SLOW);
1399 /* ------------ BEGIN semi-fast cases ------------ */
1400 /* These deal quickly-ish with the common auxiliary primary map
1401 cases on 64-bit platforms. Are merely a speedup hack; can be
1402 omitted without loss of correctness/functionality. Note that in
1403 both cases the "sizeof(void*) == 8" causes these cases to be
1404 folded out by compilers on 32-bit platforms. These are derived
1405 from LOADV64 and LOADV32.
1408 # if defined(VGA_mips64) && defined(VGABI_N32)
1409 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1410 # else
1411 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1412 # endif
1414 SecMap* sm = get_secmap_for_reading(a);
1415 UWord sm_off16 = SM_OFF_16(a);
1416 UWord vabits16 = sm->vabits16[sm_off16];
1417 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1418 return V_BITS64_DEFINED;
1419 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1420 return V_BITS64_UNDEFINED;
1421 /* else fall into the slow case */
1424 # if defined(VGA_mips64) && defined(VGABI_N32)
1425 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1426 # else
1427 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1428 # endif
1430 SecMap* sm = get_secmap_for_reading(a);
1431 UWord sm_off = SM_OFF(a);
1432 UWord vabits8 = sm->vabits8[sm_off];
1433 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1434 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1435 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1436 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1437 /* else fall into slow case */
1440 /* ------------ END semi-fast cases ------------ */
1442 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1443 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1444 SSizeT szB = nBits / 8;
1445 SSizeT i; /* Must be signed. */
1446 SizeT n_addrs_bad = 0;
1447 Addr ai;
1448 UChar vbits8;
1449 Bool ok;
1451 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1453 /* Make up a 64-bit result V word, which contains the loaded data
1454 for valid addresses and Defined for invalid addresses. Iterate
1455 over the bytes in the word, from the most significant down to
1456 the least. The vbits to return are calculated into vbits64.
1457 Also compute the pessimising value to be used when
1458 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1459 info can be gleaned from pessim64) but is used as a
1460 cross-check. */
1461 for (i = szB-1; i >= 0; i--) {
1462 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1463 ai = a + byte_offset_w(szB, bigendian, i);
1464 ok = get_vbits8(ai, &vbits8);
1465 vbits64 <<= 8;
1466 vbits64 |= vbits8;
1467 if (!ok) n_addrs_bad++;
1468 pessim64 <<= 8;
1469 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1472 /* In the common case, all the addresses involved are valid, so we
1473 just return the computed V bits and have done. */
1474 if (LIKELY(n_addrs_bad == 0))
1475 return vbits64;
1477 /* If there's no possibility of getting a partial-loads-ok
1478 exemption, report the error and quit. */
1479 if (!MC_(clo_partial_loads_ok)) {
1480 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1481 return vbits64;
1484 /* The partial-loads-ok excemption might apply. Find out if it
1485 does. If so, don't report an addressing error, but do return
1486 Undefined for the bytes that are out of range, so as to avoid
1487 false negatives. If it doesn't apply, just report an addressing
1488 error in the usual way. */
1490 /* Some code steps along byte strings in aligned word-sized chunks
1491 even when there is only a partially defined word at the end (eg,
1492 optimised strlen). This is allowed by the memory model of
1493 modern machines, since an aligned load cannot span two pages and
1494 thus cannot "partially fault". Despite such behaviour being
1495 declared undefined by ANSI C/C++.
1497 Therefore, a load from a partially-addressible place is allowed
1498 if all of the following hold:
1499 - the command-line flag is set [by default, it isn't]
1500 - it's a word-sized, word-aligned load
1501 - at least one of the addresses in the word *is* valid
1503 Since this suppresses the addressing error, we avoid false
1504 negatives by marking bytes undefined when they come from an
1505 invalid address.
1508 /* "at least one of the addresses is invalid" */
1509 tl_assert(pessim64 != V_BITS64_DEFINED);
1511 # if defined(VGA_mips64) && defined(VGABI_N32)
1512 if (szB == VG_WORDSIZE * 2 && VG_IS_WORD_ALIGNED(a)
1513 && n_addrs_bad < VG_WORDSIZE * 2)
1514 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1515 /* On power unaligned loads of words are OK. */
1516 if (szB == VG_WORDSIZE && n_addrs_bad < VG_WORDSIZE)
1517 # else
1518 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1519 && n_addrs_bad < VG_WORDSIZE)
1520 # endif
1522 /* Exemption applies. Use the previously computed pessimising
1523 value for vbits64 and return the combined result, but don't
1524 flag an addressing error. The pessimising value is Defined
1525 for valid addresses and Undefined for invalid addresses. */
1526 /* for assumption that doing bitwise or implements UifU */
1527 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1528 /* (really need "UifU" here...)
1529 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1530 vbits64 |= pessim64;
1531 return vbits64;
1534 /* Also, in appears that gcc generates string-stepping code in
1535 32-bit chunks on 64 bit platforms. So, also grant an exception
1536 for this case. Note that the first clause of the conditional
1537 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1538 will get folded out in 32 bit builds. */
1539 # if defined(VGA_mips64) && defined(VGABI_N32)
1540 if (VG_WORDSIZE == 4
1541 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1542 # else
1543 if (VG_WORDSIZE == 8
1544 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1545 # endif
1547 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1548 /* (really need "UifU" here...)
1549 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1550 vbits64 |= pessim64;
1551 /* Mark the upper 32 bits as undefined, just to be on the safe
1552 side. */
1553 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1554 return vbits64;
1557 /* Exemption doesn't apply. Flag an addressing error in the normal
1558 way. */
1559 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1561 return vbits64;
1565 static
1566 __attribute__((noinline))
1567 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1569 SizeT szB = nBits / 8;
1570 SizeT i, n_addrs_bad = 0;
1571 UChar vbits8;
1572 Addr ai;
1573 Bool ok;
1575 PROF_EVENT(MCPE_STOREVN_SLOW);
1577 /* ------------ BEGIN semi-fast cases ------------ */
1578 /* These deal quickly-ish with the common auxiliary primary map
1579 cases on 64-bit platforms. Are merely a speedup hack; can be
1580 omitted without loss of correctness/functionality. Note that in
1581 both cases the "sizeof(void*) == 8" causes these cases to be
1582 folded out by compilers on 32-bit platforms. The logic below
1583 is somewhat similar to some cases extensively commented in
1584 MC_(helperc_STOREV8).
1586 # if defined(VGA_mips64) && defined(VGABI_N32)
1587 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1588 # else
1589 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1590 # endif
1592 SecMap* sm = get_secmap_for_reading(a);
1593 UWord sm_off16 = SM_OFF_16(a);
1594 UWord vabits16 = sm->vabits16[sm_off16];
1595 if (LIKELY( !is_distinguished_sm(sm) &&
1596 (VA_BITS16_DEFINED == vabits16 ||
1597 VA_BITS16_UNDEFINED == vabits16) )) {
1598 /* Handle common case quickly: a is suitably aligned, */
1599 /* is mapped, and is addressible. */
1600 // Convert full V-bits in register to compact 2-bit form.
1601 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1602 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
1603 return;
1604 } else if (V_BITS64_UNDEFINED == vbytes) {
1605 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
1606 return;
1608 /* else fall into the slow case */
1610 /* else fall into the slow case */
1613 # if defined(VGA_mips64) && defined(VGABI_N32)
1614 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1615 # else
1616 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1617 # endif
1619 SecMap* sm = get_secmap_for_reading(a);
1620 UWord sm_off = SM_OFF(a);
1621 UWord vabits8 = sm->vabits8[sm_off];
1622 if (LIKELY( !is_distinguished_sm(sm) &&
1623 (VA_BITS8_DEFINED == vabits8 ||
1624 VA_BITS8_UNDEFINED == vabits8) )) {
1625 /* Handle common case quickly: a is suitably aligned, */
1626 /* is mapped, and is addressible. */
1627 // Convert full V-bits in register to compact 2-bit form.
1628 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1629 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1630 return;
1631 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1632 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1633 return;
1635 /* else fall into the slow case */
1637 /* else fall into the slow case */
1639 /* ------------ END semi-fast cases ------------ */
1641 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1643 /* Dump vbytes in memory, iterating from least to most significant
1644 byte. At the same time establish addressibility of the location. */
1645 for (i = 0; i < szB; i++) {
1646 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1647 ai = a + byte_offset_w(szB, bigendian, i);
1648 vbits8 = vbytes & 0xff;
1649 ok = set_vbits8(ai, vbits8);
1650 if (!ok) n_addrs_bad++;
1651 vbytes >>= 8;
1654 /* If an address error has happened, report it. */
1655 if (n_addrs_bad > 0)
1656 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1660 /*------------------------------------------------------------*/
1661 /*--- Setting permissions over address ranges. ---*/
1662 /*------------------------------------------------------------*/
1664 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1665 UWord dsm_num )
1667 UWord sm_off, sm_off16;
1668 UWord vabits2 = vabits16 & 0x3;
1669 SizeT lenA, lenB, len_to_next_secmap;
1670 Addr aNext;
1671 SecMap* sm;
1672 SecMap** sm_ptr;
1673 SecMap* example_dsm;
1675 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1677 /* Check the V+A bits make sense. */
1678 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1679 VA_BITS16_UNDEFINED == vabits16 ||
1680 VA_BITS16_DEFINED == vabits16);
1682 // This code should never write PDBs; ensure this. (See comment above
1683 // set_vabits2().)
1684 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1686 if (lenT == 0)
1687 return;
1689 if (lenT > 256 * 1024 * 1024) {
1690 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1691 const HChar* s = "unknown???";
1692 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1693 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1694 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1695 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1696 "large range [0x%lx, 0x%lx) (%s)\n",
1697 a, a + lenT, s);
1701 #ifndef PERF_FAST_SARP
1702 /*------------------ debug-only case ------------------ */
1704 // Endianness doesn't matter here because all bytes are being set to
1705 // the same value.
1706 // Nb: We don't have to worry about updating the sec-V-bits table
1707 // after these set_vabits2() calls because this code never writes
1708 // VA_BITS2_PARTDEFINED values.
1709 SizeT i;
1710 for (i = 0; i < lenT; i++) {
1711 set_vabits2(a + i, vabits2);
1713 return;
1715 #endif
1717 /*------------------ standard handling ------------------ */
1719 /* Get the distinguished secondary that we might want
1720 to use (part of the space-compression scheme). */
1721 example_dsm = &sm_distinguished[dsm_num];
1723 // We have to handle ranges covering various combinations of partial and
1724 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1725 // Cases marked with a '*' are common.
1727 // TYPE PARTS USED
1728 // ---- ----------
1729 // * one partial sec-map (p) 1
1730 // - one whole sec-map (P) 2
1732 // * two partial sec-maps (pp) 1,3
1733 // - one partial, one whole sec-map (pP) 1,2
1734 // - one whole, one partial sec-map (Pp) 2,3
1735 // - two whole sec-maps (PP) 2,2
1737 // * one partial, one whole, one partial (pPp) 1,2,3
1738 // - one partial, two whole (pPP) 1,2,2
1739 // - two whole, one partial (PPp) 2,2,3
1740 // - three whole (PPP) 2,2,2
1742 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1743 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1744 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1745 // - N whole (PP...PP) 2,2...2,3
1747 // Break up total length (lenT) into two parts: length in the first
1748 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1749 aNext = start_of_this_sm(a) + SM_SIZE;
1750 len_to_next_secmap = aNext - a;
1751 if ( lenT <= len_to_next_secmap ) {
1752 // Range entirely within one sec-map. Covers almost all cases.
1753 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1754 lenA = lenT;
1755 lenB = 0;
1756 } else if (is_start_of_sm(a)) {
1757 // Range spans at least one whole sec-map, and starts at the beginning
1758 // of a sec-map; skip to Part 2.
1759 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1760 lenA = 0;
1761 lenB = lenT;
1762 goto part2;
1763 } else {
1764 // Range spans two or more sec-maps, first one is partial.
1765 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1766 lenA = len_to_next_secmap;
1767 lenB = lenT - lenA;
1770 //------------------------------------------------------------------------
1771 // Part 1: Deal with the first sec_map. Most of the time the range will be
1772 // entirely within a sec_map and this part alone will suffice. Also,
1773 // doing it this way lets us avoid repeatedly testing for the crossing of
1774 // a sec-map boundary within these loops.
1775 //------------------------------------------------------------------------
1777 // If it's distinguished, make it undistinguished if necessary.
1778 sm_ptr = get_secmap_ptr(a);
1779 if (is_distinguished_sm(*sm_ptr)) {
1780 if (*sm_ptr == example_dsm) {
1781 // Sec-map already has the V+A bits that we want, so skip.
1782 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1783 a = aNext;
1784 lenA = 0;
1785 } else {
1786 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1787 *sm_ptr = copy_for_writing(*sm_ptr);
1790 sm = *sm_ptr;
1792 // 1 byte steps
1793 while (True) {
1794 if (VG_IS_8_ALIGNED(a)) break;
1795 if (lenA < 1) break;
1796 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1797 sm_off = SM_OFF(a);
1798 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1799 a += 1;
1800 lenA -= 1;
1802 // 8-aligned, 8 byte steps
1803 while (True) {
1804 if (lenA < 8) break;
1805 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1806 sm_off16 = SM_OFF_16(a);
1807 sm->vabits16[sm_off16] = vabits16;
1808 a += 8;
1809 lenA -= 8;
1811 // 1 byte steps
1812 while (True) {
1813 if (lenA < 1) break;
1814 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1815 sm_off = SM_OFF(a);
1816 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1817 a += 1;
1818 lenA -= 1;
1821 // We've finished the first sec-map. Is that it?
1822 if (lenB == 0)
1823 return;
1825 //------------------------------------------------------------------------
1826 // Part 2: Fast-set entire sec-maps at a time.
1827 //------------------------------------------------------------------------
1828 part2:
1829 // 64KB-aligned, 64KB steps.
1830 // Nb: we can reach here with lenB < SM_SIZE
1831 tl_assert(0 == lenA);
1832 while (True) {
1833 if (lenB < SM_SIZE) break;
1834 tl_assert(is_start_of_sm(a));
1835 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1836 sm_ptr = get_secmap_ptr(a);
1837 if (!is_distinguished_sm(*sm_ptr)) {
1838 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1839 // Free the non-distinguished sec-map that we're replacing. This
1840 // case happens moderately often, enough to be worthwhile.
1841 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1842 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1844 update_SM_counts(*sm_ptr, example_dsm);
1845 // Make the sec-map entry point to the example DSM
1846 *sm_ptr = example_dsm;
1847 lenB -= SM_SIZE;
1848 a += SM_SIZE;
1851 // We've finished the whole sec-maps. Is that it?
1852 if (lenB == 0)
1853 return;
1855 //------------------------------------------------------------------------
1856 // Part 3: Finish off the final partial sec-map, if necessary.
1857 //------------------------------------------------------------------------
1859 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1861 // If it's distinguished, make it undistinguished if necessary.
1862 sm_ptr = get_secmap_ptr(a);
1863 if (is_distinguished_sm(*sm_ptr)) {
1864 if (*sm_ptr == example_dsm) {
1865 // Sec-map already has the V+A bits that we want, so stop.
1866 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1867 return;
1868 } else {
1869 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1870 *sm_ptr = copy_for_writing(*sm_ptr);
1873 sm = *sm_ptr;
1875 // 8-aligned, 8 byte steps
1876 while (True) {
1877 if (lenB < 8) break;
1878 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1879 sm_off16 = SM_OFF_16(a);
1880 sm->vabits16[sm_off16] = vabits16;
1881 a += 8;
1882 lenB -= 8;
1884 // 1 byte steps
1885 while (True) {
1886 if (lenB < 1) return;
1887 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1888 sm_off = SM_OFF(a);
1889 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1890 a += 1;
1891 lenB -= 1;
1896 /* --- Set permissions for arbitrary address ranges --- */
1898 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1900 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1901 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1902 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1903 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1904 ocache_sarp_Clear_Origins ( a, len );
1907 static void make_mem_undefined ( Addr a, SizeT len )
1909 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1910 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1911 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1914 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1916 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1917 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1918 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1919 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1920 ocache_sarp_Set_Origins ( a, len, otag );
1923 static
1924 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1925 ThreadId tid, UInt okind )
1927 UInt ecu;
1928 ExeContext* here;
1929 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1930 if it is invalid. So no need to do it here. */
1931 tl_assert(okind <= 3);
1932 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1933 tl_assert(here);
1934 ecu = VG_(get_ECU_from_ExeContext)(here);
1935 tl_assert(VG_(is_plausible_ECU)(ecu));
1936 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1939 static
1940 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1942 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1945 static
1946 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1948 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1951 void MC_(make_mem_defined) ( Addr a, SizeT len )
1953 PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1954 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1955 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1956 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1957 ocache_sarp_Clear_Origins ( a, len );
1960 __attribute__((unused))
1961 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1963 MC_(make_mem_defined)(a, len);
1966 /* For each byte in [a,a+len), if the byte is addressable, make it be
1967 defined, but if it isn't addressible, leave it alone. In other
1968 words a version of MC_(make_mem_defined) that doesn't mess with
1969 addressibility. Low-performance implementation. */
1970 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1972 SizeT i;
1973 UChar vabits2;
1974 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1975 for (i = 0; i < len; i++) {
1976 vabits2 = get_vabits2( a+i );
1977 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1978 set_vabits2(a+i, VA_BITS2_DEFINED);
1979 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1980 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1986 /* Similarly (needed for mprotect handling ..) */
1987 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1989 SizeT i;
1990 UChar vabits2;
1991 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1992 for (i = 0; i < len; i++) {
1993 vabits2 = get_vabits2( a+i );
1994 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1995 set_vabits2(a+i, VA_BITS2_DEFINED);
1996 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1997 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
2003 /* --- Block-copy permissions (needed for implementing realloc() and
2004 sys_mremap). --- */
2006 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
2008 SizeT i, j;
2009 UChar vabits2, vabits8;
2010 Bool aligned, nooverlap;
2012 DEBUG("MC_(copy_address_range_state)\n");
2013 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
2015 if (len == 0 || src == dst)
2016 return;
2018 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
2019 nooverlap = src+len <= dst || dst+len <= src;
2021 if (nooverlap && aligned) {
2023 /* Vectorised fast case, when no overlap and suitably aligned */
2024 /* vector loop */
2025 i = 0;
2026 while (len >= 4) {
2027 vabits8 = get_vabits8_for_aligned_word32( src+i );
2028 set_vabits8_for_aligned_word32( dst+i, vabits8 );
2029 if (LIKELY(VA_BITS8_DEFINED == vabits8
2030 || VA_BITS8_UNDEFINED == vabits8
2031 || VA_BITS8_NOACCESS == vabits8)) {
2032 /* do nothing */
2033 } else {
2034 /* have to copy secondary map info */
2035 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
2036 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
2037 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
2038 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
2039 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
2040 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
2041 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
2042 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
2044 i += 4;
2045 len -= 4;
2047 /* fixup loop */
2048 while (len >= 1) {
2049 vabits2 = get_vabits2( src+i );
2050 set_vabits2( dst+i, vabits2 );
2051 if (VA_BITS2_PARTDEFINED == vabits2) {
2052 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2054 i++;
2055 len--;
2058 } else {
2060 /* We have to do things the slow way */
2061 if (src < dst) {
2062 for (i = 0, j = len-1; i < len; i++, j--) {
2063 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
2064 vabits2 = get_vabits2( src+j );
2065 set_vabits2( dst+j, vabits2 );
2066 if (VA_BITS2_PARTDEFINED == vabits2) {
2067 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
2072 if (src > dst) {
2073 for (i = 0; i < len; i++) {
2074 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
2075 vabits2 = get_vabits2( src+i );
2076 set_vabits2( dst+i, vabits2 );
2077 if (VA_BITS2_PARTDEFINED == vabits2) {
2078 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2087 /*------------------------------------------------------------*/
2088 /*--- Origin tracking stuff - cache basics ---*/
2089 /*------------------------------------------------------------*/
2091 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2092 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2094 Note that this implementation draws inspiration from the "origin
2095 tracking by value piggybacking" scheme described in "Tracking Bad
2096 Apples: Reporting the Origin of Null and Undefined Value Errors"
2097 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2098 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2099 implemented completely differently.
2101 Origin tags and ECUs -- about the shadow values
2102 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2104 This implementation tracks the defining point of all uninitialised
2105 values using so called "origin tags", which are 32-bit integers,
2106 rather than using the values themselves to encode the origins. The
2107 latter, so-called value piggybacking", is what the OOPSLA07 paper
2108 describes.
2110 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2111 ints (UInts), regardless of the machine's word size. Each tag
2112 comprises an upper 30-bit ECU field and a lower 2-bit
2113 'kind' field. The ECU field is a number given out by m_execontext
2114 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2115 directly as an origin tag (otag), but in fact we want to put
2116 additional information 'kind' field to indicate roughly where the
2117 tag came from. This helps print more understandable error messages
2118 for the user -- it has no other purpose. In summary:
2120 * Both ECUs and origin tags are represented as 32-bit words
2122 * m_execontext and the core-tool interface deal purely in ECUs.
2123 They have no knowledge of origin tags - that is a purely
2124 Memcheck-internal matter.
2126 * all valid ECUs have the lowest 2 bits zero and at least
2127 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2129 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2130 constants defined in mc_include.h.
2132 * to convert an otag back to an ECU, AND it with ~3
2134 One important fact is that no valid otag is zero. A zero otag is
2135 used by the implementation to indicate "no origin", which could
2136 mean that either the value is defined, or it is undefined but the
2137 implementation somehow managed to lose the origin.
2139 The ECU used for memory created by malloc etc is derived from the
2140 stack trace at the time the malloc etc happens. This means the
2141 mechanism can show the exact allocation point for heap-created
2142 uninitialised values.
2144 In contrast, it is simply too expensive to create a complete
2145 backtrace for each stack allocation. Therefore we merely use a
2146 depth-1 backtrace for stack allocations, which can be done once at
2147 translation time, rather than N times at run time. The result of
2148 this is that, for stack created uninitialised values, Memcheck can
2149 only show the allocating function, and not what called it.
2150 Furthermore, compilers tend to move the stack pointer just once at
2151 the start of the function, to allocate all locals, and so in fact
2152 the stack origin almost always simply points to the opening brace
2153 of the function. Net result is, for stack origins, the mechanism
2154 can tell you in which function the undefined value was created, but
2155 that's all. Users will need to carefully check all locals in the
2156 specified function.
2158 Shadowing registers and memory
2159 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2161 Memory is shadowed using a two level cache structure (ocacheL1 and
2162 ocacheL2). Memory references are first directed to ocacheL1. This
2163 is a traditional 2-way set associative cache with 32-byte lines and
2164 approximate LRU replacement within each set.
2166 A naive implementation would require storing one 32 bit otag for
2167 each byte of memory covered, a 4:1 space overhead. Instead, there
2168 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2169 that shows which of the 4 bytes have that shadow value and which
2170 have a shadow value of zero (indicating no origin). Hence a lot of
2171 space is saved, but the cost is that only one different origin per
2172 4 bytes of address space can be represented. This is a source of
2173 imprecision, but how much of a problem it really is remains to be
2174 seen.
2176 A cache line that contains all zeroes ("no origins") contains no
2177 useful information, and can be ejected from the L1 cache "for
2178 free", in the sense that a read miss on the L1 causes a line of
2179 zeroes to be installed. However, ejecting a line containing
2180 nonzeroes risks losing origin information permanently. In order to
2181 prevent such lossage, ejected nonzero lines are placed in a
2182 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2183 lines. This can grow arbitrarily large, and so should ensure that
2184 Memcheck runs out of memory in preference to losing useful origin
2185 info due to cache size limitations.
2187 Shadowing registers is a bit tricky, because the shadow values are
2188 32 bits, regardless of the size of the register. That gives a
2189 problem for registers smaller than 32 bits. The solution is to
2190 find spaces in the guest state that are unused, and use those to
2191 shadow guest state fragments smaller than 32 bits. For example, on
2192 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2193 shadow are allocated for the register's otag, then there are still
2194 12 bytes left over which could be used to shadow 3 other values.
2196 This implies there is some non-obvious mapping from guest state
2197 (start,length) pairs to the relevant shadow offset (for the origin
2198 tags). And it is unfortunately guest-architecture specific. The
2199 mapping is contained in mc_machine.c, which is quite lengthy but
2200 straightforward.
2202 Instrumenting the IR
2203 ~~~~~~~~~~~~~~~~~~~~
2205 Instrumentation is largely straightforward, and done by the
2206 functions schemeE and schemeS in mc_translate.c. These generate
2207 code for handling the origin tags of expressions (E) and statements
2208 (S) respectively. The rather strange names are a reference to the
2209 "compilation schemes" shown in Simon Peyton Jones' book "The
2210 Implementation of Functional Programming Languages" (Prentice Hall,
2211 1987, see
2212 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2214 schemeS merely arranges to move shadow values around the guest
2215 state to track the incoming IR. schemeE is largely trivial too.
2216 The only significant point is how to compute the otag corresponding
2217 to binary (or ternary, quaternary, etc) operator applications. The
2218 rule is simple: just take whichever value is larger (32-bit
2219 unsigned max). Constants get the special value zero. Hence this
2220 rule always propagates a nonzero (known) otag in preference to a
2221 zero (unknown, or more likely, value-is-defined) tag, as we want.
2222 If two different undefined values are inputs to a binary operator
2223 application, then which is propagated is arbitrary, but that
2224 doesn't matter, since the program is erroneous in using either of
2225 the values, and so there's no point in attempting to propagate
2226 both.
2228 Since constants are abstracted to (otag) zero, much of the
2229 instrumentation code can be folded out without difficulty by the
2230 generic post-instrumentation IR cleanup pass, using these rules:
2231 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2232 constants is evaluated at JIT time. And the resulting dead code
2233 removal. In practice this causes surprisingly few Max32Us to
2234 survive through to backend code generation.
2236 Integration with the V-bits machinery
2237 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2239 This is again largely straightforward. Mostly the otag and V bits
2240 stuff are independent. The only point of interaction is when the V
2241 bits instrumenter creates a call to a helper function to report an
2242 uninitialised value error -- in that case it must first use schemeE
2243 to get hold of the origin tag expression for the value, and pass
2244 that to the helper too.
2246 There is the usual stuff to do with setting address range
2247 permissions. When memory is painted undefined, we must also know
2248 the origin tag to paint with, which involves some tedious plumbing,
2249 particularly to do with the fast case stack handlers. When memory
2250 is painted defined or noaccess then the origin tags must be forced
2251 to zero.
2253 One of the goals of the implementation was to ensure that the
2254 non-origin tracking mode isn't slowed down at all. To do this,
2255 various functions to do with memory permissions setting (again,
2256 mostly pertaining to the stack) are duplicated for the with- and
2257 without-otag case.
2259 Dealing with stack redzones, and the NIA cache
2260 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2262 This is one of the few non-obvious parts of the implementation.
2264 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2265 reserved area below the stack pointer, that can be used as scratch
2266 space by compiler generated code for functions. In the Memcheck
2267 sources this is referred to as the "stack redzone". The important
2268 thing here is that such redzones are considered volatile across
2269 function calls and returns. So Memcheck takes care to mark them as
2270 undefined for each call and return, on the afflicted platforms.
2271 Past experience shows this is essential in order to get reliable
2272 messages about uninitialised values that come from the stack.
2274 So the question is, when we paint a redzone undefined, what origin
2275 tag should we use for it? Consider a function f() calling g(). If
2276 we paint the redzone using an otag derived from the ExeContext of
2277 the CALL/BL instruction in f, then any errors in g causing it to
2278 use uninitialised values that happen to lie in the redzone, will be
2279 reported as having their origin in f. Which is highly confusing.
2281 The same applies for returns: if, on a return, we paint the redzone
2282 using a origin tag derived from the ExeContext of the RET/BLR
2283 instruction in g, then any later errors in f causing it to use
2284 uninitialised values in the redzone, will be reported as having
2285 their origin in g. Which is just as confusing.
2287 To do it right, in both cases we need to use an origin tag which
2288 pertains to the instruction which dynamically follows the CALL/BL
2289 or RET/BLR. In short, one derived from the NIA - the "next
2290 instruction address".
2292 To make this work, Memcheck's redzone-painting helper,
2293 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2294 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2295 ExeContext's ECU as the basis for the otag used to paint the
2296 redzone. The expensive part of this is converting an NIA into an
2297 ECU, since this happens once for every call and every return. So
2298 we use a simple 511-line, 2-way set associative cache
2299 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2300 the cost out.
2302 Further background comments
2303 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2305 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2306 > it really just the address of the relevant ExeContext?
2308 Well, it's not the address, but a value which has a 1-1 mapping
2309 with ExeContexts, and is guaranteed not to be zero, since zero
2310 denotes (to memcheck) "unknown origin or defined value". So these
2311 UInts are just numbers starting at 4 and incrementing by 4; each
2312 ExeContext is given a number when it is created. (*** NOTE this
2313 confuses otags and ECUs; see comments above ***).
2315 Making these otags 32-bit regardless of the machine's word size
2316 makes the 64-bit implementation easier (next para). And it doesn't
2317 really limit us in any way, since for the tags to overflow would
2318 require that the program somehow caused 2^30-1 different
2319 ExeContexts to be created, in which case it is probably in deep
2320 trouble. Not to mention V will have soaked up many tens of
2321 gigabytes of memory merely to store them all.
2323 So having 64-bit origins doesn't really buy you anything, and has
2324 the following downsides:
2326 Suppose that instead, an otag is a UWord. This would mean that, on
2327 a 64-bit target,
2329 1. It becomes hard to shadow any element of guest state which is
2330 smaller than 8 bytes. To do so means you'd need to find some
2331 8-byte-sized hole in the guest state which you don't want to
2332 shadow, and use that instead to hold the otag. On ppc64, the
2333 condition code register(s) are split into 20 UChar sized pieces,
2334 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2335 and so that would entail finding 160 bytes somewhere else in the
2336 guest state.
2338 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2339 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2340 same) and so I had to look for 4 untracked otag-sized areas in
2341 the guest state to make that possible.
2343 The same problem exists of course when origin tags are only 32
2344 bits, but it's less extreme.
2346 2. (More compelling) it doubles the size of the origin shadow
2347 memory. Given that the shadow memory is organised as a fixed
2348 size cache, and that accuracy of tracking is limited by origins
2349 falling out the cache due to space conflicts, this isn't good.
2351 > Another question: is the origin tracking perfect, or are there
2352 > cases where it fails to determine an origin?
2354 It is imperfect for at least for the following reasons, and
2355 probably more:
2357 * Insufficient capacity in the origin cache. When a line is
2358 evicted from the cache it is gone forever, and so subsequent
2359 queries for the line produce zero, indicating no origin
2360 information. Interestingly, a line containing all zeroes can be
2361 evicted "free" from the cache, since it contains no useful
2362 information, so there is scope perhaps for some cleverer cache
2363 management schemes. (*** NOTE, with the introduction of the
2364 second level origin tag cache, ocacheL2, this is no longer a
2365 problem. ***)
2367 * The origin cache only stores one otag per 32-bits of address
2368 space, plus 4 bits indicating which of the 4 bytes has that tag
2369 and which are considered defined. The result is that if two
2370 undefined bytes in the same word are stored in memory, the first
2371 stored byte's origin will be lost and replaced by the origin for
2372 the second byte.
2374 * Nonzero origin tags for defined values. Consider a binary
2375 operator application op(x,y). Suppose y is undefined (and so has
2376 a valid nonzero origin tag), and x is defined, but erroneously
2377 has a nonzero origin tag (defined values should have tag zero).
2378 If the erroneous tag has a numeric value greater than y's tag,
2379 then the rule for propagating origin tags though binary
2380 operations, which is simply to take the unsigned max of the two
2381 tags, will erroneously propagate x's tag rather than y's.
2383 * Some obscure uses of x86/amd64 byte registers can cause lossage
2384 or confusion of origins. %AH .. %DH are treated as different
2385 from, and unrelated to, their parent registers, %EAX .. %EDX.
2386 So some weird sequences like
2388 movb undefined-value, %AH
2389 movb defined-value, %AL
2390 .. use %AX or %EAX ..
2392 will cause the origin attributed to %AH to be ignored, since %AL,
2393 %AX, %EAX are treated as the same register, and %AH as a
2394 completely separate one.
2396 But having said all that, it actually seems to work fairly well in
2397 practice.
2400 static UWord stats_ocacheL1_find = 0;
2401 static UWord stats_ocacheL1_found_at_1 = 0;
2402 static UWord stats_ocacheL1_found_at_N = 0;
2403 static UWord stats_ocacheL1_misses = 0;
2404 static UWord stats_ocacheL1_lossage = 0;
2405 static UWord stats_ocacheL1_movefwds = 0;
2407 static UWord stats__ocacheL2_refs = 0;
2408 static UWord stats__ocacheL2_misses = 0;
2409 static UWord stats__ocacheL2_n_nodes_max = 0;
2411 /* Cache of 32-bit values, one every 32 bits of address space */
2413 #define OC_BITS_PER_LINE 5
2414 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2416 static INLINE UWord oc_line_offset ( Addr a ) {
2417 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2419 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2420 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2423 #define OC_LINES_PER_SET 2
2425 #define OC_N_SET_BITS 20
2426 #define OC_N_SETS (1 << OC_N_SET_BITS)
2428 /* These settings give:
2429 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2430 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2433 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2436 typedef
2437 struct {
2438 Addr tag;
2439 UInt w32[OC_W32S_PER_LINE];
2440 UChar descr[OC_W32S_PER_LINE];
2442 OCacheLine;
2444 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2445 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2446 and 'z' if all the represented tags are zero. */
2447 static UChar classify_OCacheLine ( OCacheLine* line )
2449 UWord i;
2450 if (line->tag == 1/*invalid*/)
2451 return 'e'; /* EMPTY */
2452 tl_assert(is_valid_oc_tag(line->tag));
2453 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2454 tl_assert(0 == ((~0xF) & line->descr[i]));
2455 if (line->w32[i] > 0 && line->descr[i] > 0)
2456 return 'n'; /* NONZERO - contains useful info */
2458 return 'z'; /* ZERO - no useful info */
2461 typedef
2462 struct {
2463 OCacheLine line[OC_LINES_PER_SET];
2465 OCacheSet;
2467 typedef
2468 struct {
2469 OCacheSet set[OC_N_SETS];
2471 OCache;
2473 static OCache* ocacheL1 = NULL;
2474 static UWord ocacheL1_event_ctr = 0;
2476 static void init_ocacheL2 ( void ); /* fwds */
2477 static void init_OCache ( void )
2479 UWord line, set;
2480 tl_assert(MC_(clo_mc_level) >= 3);
2481 tl_assert(ocacheL1 == NULL);
2482 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2483 if (ocacheL1 == NULL) {
2484 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2485 sizeof(OCache) );
2487 tl_assert(ocacheL1 != NULL);
2488 for (set = 0; set < OC_N_SETS; set++) {
2489 for (line = 0; line < OC_LINES_PER_SET; line++) {
2490 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2493 init_ocacheL2();
2496 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2498 OCacheLine tmp;
2499 stats_ocacheL1_movefwds++;
2500 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2501 tmp = set->line[lineno-1];
2502 set->line[lineno-1] = set->line[lineno];
2503 set->line[lineno] = tmp;
2506 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2507 UWord i;
2508 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2509 line->w32[i] = 0; /* NO ORIGIN */
2510 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2512 line->tag = tag;
2515 //////////////////////////////////////////////////////////////
2516 //// OCache backing store
2518 static OSet* ocacheL2 = NULL;
2520 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2521 return VG_(malloc)(cc, szB);
2523 static void ocacheL2_free ( void* v ) {
2524 VG_(free)( v );
2527 /* Stats: # nodes currently in tree */
2528 static UWord stats__ocacheL2_n_nodes = 0;
2530 static void init_ocacheL2 ( void )
2532 tl_assert(!ocacheL2);
2533 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2534 tl_assert(0 == offsetof(OCacheLine,tag));
2535 ocacheL2
2536 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2537 NULL, /* fast cmp */
2538 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2539 stats__ocacheL2_n_nodes = 0;
2542 /* Find line with the given tag in the tree, or NULL if not found. */
2543 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2545 OCacheLine* line;
2546 tl_assert(is_valid_oc_tag(tag));
2547 stats__ocacheL2_refs++;
2548 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2549 return line;
2552 /* Delete the line with the given tag from the tree, if it is present, and
2553 free up the associated memory. */
2554 static void ocacheL2_del_tag ( Addr tag )
2556 OCacheLine* line;
2557 tl_assert(is_valid_oc_tag(tag));
2558 stats__ocacheL2_refs++;
2559 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2560 if (line) {
2561 VG_(OSetGen_FreeNode)(ocacheL2, line);
2562 tl_assert(stats__ocacheL2_n_nodes > 0);
2563 stats__ocacheL2_n_nodes--;
2567 /* Add a copy of the given line to the tree. It must not already be
2568 present. */
2569 static void ocacheL2_add_line ( OCacheLine* line )
2571 OCacheLine* copy;
2572 tl_assert(is_valid_oc_tag(line->tag));
2573 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2574 *copy = *line;
2575 stats__ocacheL2_refs++;
2576 VG_(OSetGen_Insert)( ocacheL2, copy );
2577 stats__ocacheL2_n_nodes++;
2578 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2579 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2582 ////
2583 //////////////////////////////////////////////////////////////
2585 __attribute__((noinline))
2586 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2588 OCacheLine *victim, *inL2;
2589 UChar c;
2590 UWord line;
2591 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2592 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2593 UWord tag = a & tagmask;
2594 tl_assert(setno >= 0 && setno < OC_N_SETS);
2596 /* we already tried line == 0; skip therefore. */
2597 for (line = 1; line < OC_LINES_PER_SET; line++) {
2598 if (ocacheL1->set[setno].line[line].tag == tag) {
2599 if (line == 1) {
2600 stats_ocacheL1_found_at_1++;
2601 } else {
2602 stats_ocacheL1_found_at_N++;
2604 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2605 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2606 moveLineForwards( &ocacheL1->set[setno], line );
2607 line--;
2609 return &ocacheL1->set[setno].line[line];
2613 /* A miss. Use the last slot. Implicitly this means we're
2614 ejecting the line in the last slot. */
2615 stats_ocacheL1_misses++;
2616 tl_assert(line == OC_LINES_PER_SET);
2617 line--;
2618 tl_assert(line > 0);
2620 /* First, move the to-be-ejected line to the L2 cache. */
2621 victim = &ocacheL1->set[setno].line[line];
2622 c = classify_OCacheLine(victim);
2623 switch (c) {
2624 case 'e':
2625 /* the line is empty (has invalid tag); ignore it. */
2626 break;
2627 case 'z':
2628 /* line contains zeroes. We must ensure the backing store is
2629 updated accordingly, either by copying the line there
2630 verbatim, or by ensuring it isn't present there. We
2631 chosse the latter on the basis that it reduces the size of
2632 the backing store. */
2633 ocacheL2_del_tag( victim->tag );
2634 break;
2635 case 'n':
2636 /* line contains at least one real, useful origin. Copy it
2637 to the backing store. */
2638 stats_ocacheL1_lossage++;
2639 inL2 = ocacheL2_find_tag( victim->tag );
2640 if (inL2) {
2641 *inL2 = *victim;
2642 } else {
2643 ocacheL2_add_line( victim );
2645 break;
2646 default:
2647 tl_assert(0);
2650 /* Now we must reload the L1 cache from the backing tree, if
2651 possible. */
2652 tl_assert(tag != victim->tag); /* stay sane */
2653 inL2 = ocacheL2_find_tag( tag );
2654 if (inL2) {
2655 /* We're in luck. It's in the L2. */
2656 ocacheL1->set[setno].line[line] = *inL2;
2657 } else {
2658 /* Missed at both levels of the cache hierarchy. We have to
2659 declare it as full of zeroes (unknown origins). */
2660 stats__ocacheL2_misses++;
2661 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2664 /* Move it one forwards */
2665 moveLineForwards( &ocacheL1->set[setno], line );
2666 line--;
2668 return &ocacheL1->set[setno].line[line];
2671 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2673 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2674 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2675 UWord tag = a & tagmask;
2677 stats_ocacheL1_find++;
2679 if (OC_ENABLE_ASSERTIONS) {
2680 tl_assert(setno >= 0 && setno < OC_N_SETS);
2681 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2684 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2685 return &ocacheL1->set[setno].line[0];
2688 return find_OCacheLine_SLOW( a );
2691 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2693 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2694 //// Set the origins for a+0 .. a+7
2695 { OCacheLine* line;
2696 UWord lineoff = oc_line_offset(a);
2697 if (OC_ENABLE_ASSERTIONS) {
2698 tl_assert(lineoff >= 0
2699 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2701 line = find_OCacheLine( a );
2702 line->descr[lineoff+0] = 0xF;
2703 line->descr[lineoff+1] = 0xF;
2704 line->w32[lineoff+0] = otag;
2705 line->w32[lineoff+1] = otag;
2707 //// END inlined, specialised version of MC_(helperc_b_store8)
2711 /*------------------------------------------------------------*/
2712 /*--- Aligned fast case permission setters, ---*/
2713 /*--- for dealing with stacks ---*/
2714 /*------------------------------------------------------------*/
2716 /*--------------------- 32-bit ---------------------*/
2718 /* Nb: by "aligned" here we mean 4-byte aligned */
2720 static INLINE void make_aligned_word32_undefined ( Addr a )
2722 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2724 #ifndef PERF_FAST_STACK2
2725 make_mem_undefined(a, 4);
2726 #else
2728 UWord sm_off;
2729 SecMap* sm;
2731 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2732 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2733 make_mem_undefined(a, 4);
2734 return;
2737 sm = get_secmap_for_writing_low(a);
2738 sm_off = SM_OFF(a);
2739 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2741 #endif
2744 static INLINE
2745 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2747 make_aligned_word32_undefined(a);
2748 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2749 //// Set the origins for a+0 .. a+3
2750 { OCacheLine* line;
2751 UWord lineoff = oc_line_offset(a);
2752 if (OC_ENABLE_ASSERTIONS) {
2753 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2755 line = find_OCacheLine( a );
2756 line->descr[lineoff] = 0xF;
2757 line->w32[lineoff] = otag;
2759 //// END inlined, specialised version of MC_(helperc_b_store4)
2762 static INLINE
2763 void make_aligned_word32_noaccess ( Addr a )
2765 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2767 #ifndef PERF_FAST_STACK2
2768 MC_(make_mem_noaccess)(a, 4);
2769 #else
2771 UWord sm_off;
2772 SecMap* sm;
2774 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2775 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2776 MC_(make_mem_noaccess)(a, 4);
2777 return;
2780 sm = get_secmap_for_writing_low(a);
2781 sm_off = SM_OFF(a);
2782 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2784 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2785 //// Set the origins for a+0 .. a+3.
2786 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2787 OCacheLine* line;
2788 UWord lineoff = oc_line_offset(a);
2789 if (OC_ENABLE_ASSERTIONS) {
2790 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2792 line = find_OCacheLine( a );
2793 line->descr[lineoff] = 0;
2795 //// END inlined, specialised version of MC_(helperc_b_store4)
2797 #endif
2800 /*--------------------- 64-bit ---------------------*/
2802 /* Nb: by "aligned" here we mean 8-byte aligned */
2804 static INLINE void make_aligned_word64_undefined ( Addr a )
2806 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2808 #ifndef PERF_FAST_STACK2
2809 make_mem_undefined(a, 8);
2810 #else
2812 UWord sm_off16;
2813 SecMap* sm;
2815 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2816 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2817 make_mem_undefined(a, 8);
2818 return;
2821 sm = get_secmap_for_writing_low(a);
2822 sm_off16 = SM_OFF_16(a);
2823 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
2825 #endif
2828 static INLINE
2829 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2831 make_aligned_word64_undefined(a);
2832 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2833 //// Set the origins for a+0 .. a+7
2834 { OCacheLine* line;
2835 UWord lineoff = oc_line_offset(a);
2836 tl_assert(lineoff >= 0
2837 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2838 line = find_OCacheLine( a );
2839 line->descr[lineoff+0] = 0xF;
2840 line->descr[lineoff+1] = 0xF;
2841 line->w32[lineoff+0] = otag;
2842 line->w32[lineoff+1] = otag;
2844 //// END inlined, specialised version of MC_(helperc_b_store8)
2847 static INLINE
2848 void make_aligned_word64_noaccess ( Addr a )
2850 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2852 #ifndef PERF_FAST_STACK2
2853 MC_(make_mem_noaccess)(a, 8);
2854 #else
2856 UWord sm_off16;
2857 SecMap* sm;
2859 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2860 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2861 MC_(make_mem_noaccess)(a, 8);
2862 return;
2865 sm = get_secmap_for_writing_low(a);
2866 sm_off16 = SM_OFF_16(a);
2867 sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
2869 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2870 //// Clear the origins for a+0 .. a+7.
2871 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2872 OCacheLine* line;
2873 UWord lineoff = oc_line_offset(a);
2874 tl_assert(lineoff >= 0
2875 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2876 line = find_OCacheLine( a );
2877 line->descr[lineoff+0] = 0;
2878 line->descr[lineoff+1] = 0;
2880 //// END inlined, specialised version of MC_(helperc_b_store8)
2882 #endif
2886 /*------------------------------------------------------------*/
2887 /*--- Stack pointer adjustment ---*/
2888 /*------------------------------------------------------------*/
2890 #ifdef PERF_FAST_STACK
2891 # define MAYBE_USED
2892 #else
2893 # define MAYBE_USED __attribute__((unused))
2894 #endif
2896 /*--------------- adjustment by 4 bytes ---------------*/
2898 MAYBE_USED
2899 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2901 UInt otag = ecu | MC_OKIND_STACK;
2902 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2903 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2904 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2905 } else {
2906 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2910 MAYBE_USED
2911 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2913 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2914 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2915 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2916 } else {
2917 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2921 MAYBE_USED
2922 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2924 PROF_EVENT(MCPE_DIE_MEM_STACK_4);
2925 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2926 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2927 } else {
2928 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2932 /*--------------- adjustment by 8 bytes ---------------*/
2934 MAYBE_USED
2935 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2937 UInt otag = ecu | MC_OKIND_STACK;
2938 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2939 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2940 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2941 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2942 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2943 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2944 } else {
2945 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2949 MAYBE_USED
2950 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2952 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2953 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2954 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2955 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2956 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2957 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2958 } else {
2959 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2963 MAYBE_USED
2964 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2966 PROF_EVENT(MCPE_DIE_MEM_STACK_8);
2967 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2968 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2969 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2970 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2971 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2972 } else {
2973 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2977 /*--------------- adjustment by 12 bytes ---------------*/
2979 MAYBE_USED
2980 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2982 UInt otag = ecu | MC_OKIND_STACK;
2983 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2984 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2985 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2986 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2987 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2988 /* from previous test we don't have 8-alignment at offset +0,
2989 hence must have 8 alignment at offsets +4/-4. Hence safe to
2990 do 4 at +0 and then 8 at +4/. */
2991 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2992 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2993 } else {
2994 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2998 MAYBE_USED
2999 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
3001 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
3002 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3003 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3004 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3005 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3006 /* from previous test we don't have 8-alignment at offset +0,
3007 hence must have 8 alignment at offsets +4/-4. Hence safe to
3008 do 4 at +0 and then 8 at +4/. */
3009 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3010 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3011 } else {
3012 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
3016 MAYBE_USED
3017 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
3019 PROF_EVENT(MCPE_DIE_MEM_STACK_12);
3020 /* Note the -12 in the test */
3021 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
3022 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3023 -4. */
3024 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3025 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3026 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3027 /* We have 4-alignment at +0, but we don't have 8-alignment at
3028 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3029 and then 8 at -8. */
3030 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3031 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3032 } else {
3033 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
3037 /*--------------- adjustment by 16 bytes ---------------*/
3039 MAYBE_USED
3040 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
3042 UInt otag = ecu | MC_OKIND_STACK;
3043 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3044 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3045 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3046 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3047 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3048 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3049 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3050 Hence do 4 at +0, 8 at +4, 4 at +12. */
3051 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3052 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3053 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3054 } else {
3055 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
3059 MAYBE_USED
3060 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
3062 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3063 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3064 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3065 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3066 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3067 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3068 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3069 Hence do 4 at +0, 8 at +4, 4 at +12. */
3070 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3072 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3073 } else {
3074 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
3078 MAYBE_USED
3079 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
3081 PROF_EVENT(MCPE_DIE_MEM_STACK_16);
3082 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3083 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3084 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3085 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3086 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3087 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3088 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3089 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3090 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3091 } else {
3092 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
3096 /*--------------- adjustment by 32 bytes ---------------*/
3098 MAYBE_USED
3099 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
3101 UInt otag = ecu | MC_OKIND_STACK;
3102 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3103 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3104 /* Straightforward */
3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3106 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3107 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3108 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3109 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3110 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3111 +0,+28. */
3112 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3113 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3114 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3115 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3116 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3117 } else {
3118 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3122 MAYBE_USED
3123 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3125 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3126 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3127 /* Straightforward */
3128 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3129 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3130 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3131 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3132 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3133 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3134 +0,+28. */
3135 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3136 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3137 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3138 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3139 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3140 } else {
3141 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3145 MAYBE_USED
3146 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3148 PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3149 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3150 /* Straightforward */
3151 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3152 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3153 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3154 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3155 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3156 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3157 4 at -32,-4. */
3158 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3159 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3160 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3161 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3162 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3163 } else {
3164 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3168 /*--------------- adjustment by 112 bytes ---------------*/
3170 MAYBE_USED
3171 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3173 UInt otag = ecu | MC_OKIND_STACK;
3174 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3175 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3176 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3177 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3178 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3179 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3180 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3181 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3182 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3183 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3184 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3185 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3186 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3187 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3188 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3189 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3190 } else {
3191 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3195 MAYBE_USED
3196 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3198 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3199 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3200 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3201 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3202 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3203 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3204 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3205 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3206 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3207 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3208 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3209 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3210 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3211 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3212 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3213 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3214 } else {
3215 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3219 MAYBE_USED
3220 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3222 PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3223 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3224 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3225 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3226 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3227 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3228 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3229 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3230 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3231 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3232 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3233 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3234 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3235 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3236 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3237 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3238 } else {
3239 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3243 /*--------------- adjustment by 128 bytes ---------------*/
3245 MAYBE_USED
3246 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3248 UInt otag = ecu | MC_OKIND_STACK;
3249 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3250 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3251 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3252 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3253 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3254 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3255 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3256 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3257 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3258 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3259 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3260 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3261 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3262 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3263 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3264 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3265 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3266 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3267 } else {
3268 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3272 MAYBE_USED
3273 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3275 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3276 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3277 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3278 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3279 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3280 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3281 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3282 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3283 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3284 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3285 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3286 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3287 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3288 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3289 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3290 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3291 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3292 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3293 } else {
3294 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3298 MAYBE_USED
3299 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3301 PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3302 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3303 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3304 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3305 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3306 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3307 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3308 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3309 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3310 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3311 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3312 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3313 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3314 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3315 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3316 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3317 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3318 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3319 } else {
3320 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3324 /*--------------- adjustment by 144 bytes ---------------*/
3326 MAYBE_USED
3327 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3329 UInt otag = ecu | MC_OKIND_STACK;
3330 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3331 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3332 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3333 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3334 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3335 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3336 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3337 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3338 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3339 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3340 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3341 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3342 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3343 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3344 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3345 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3346 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3347 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3348 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3349 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3350 } else {
3351 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3355 MAYBE_USED
3356 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3358 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3359 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3360 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3361 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3362 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3363 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3364 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3365 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3366 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3367 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3368 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3369 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3370 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3371 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3372 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3373 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3374 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3375 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3376 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3377 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3378 } else {
3379 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3383 MAYBE_USED
3384 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3386 PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3387 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3388 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3389 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3390 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3391 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3392 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3393 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3394 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3395 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3396 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3397 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3398 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3399 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3400 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3401 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3402 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3403 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3404 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3405 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3406 } else {
3407 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3411 /*--------------- adjustment by 160 bytes ---------------*/
3413 MAYBE_USED
3414 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3416 UInt otag = ecu | MC_OKIND_STACK;
3417 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3418 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3419 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3420 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3421 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3422 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3423 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3424 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3425 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3426 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3427 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3428 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3429 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3430 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3431 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3432 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3433 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3434 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3435 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3436 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3437 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3438 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3439 } else {
3440 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3444 MAYBE_USED
3445 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3447 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3448 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3449 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3450 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3451 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3452 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3453 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3454 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3455 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3456 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3457 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3458 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3459 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3460 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3461 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3462 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3463 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3464 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3465 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3466 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3467 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3468 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3469 } else {
3470 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3474 MAYBE_USED
3475 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3477 PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3478 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3479 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3480 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3481 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3482 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3483 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3484 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3485 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3486 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3487 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3488 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3489 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3490 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3491 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3492 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3493 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3494 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3495 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3496 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3497 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3498 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3499 } else {
3500 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3504 /*--------------- adjustment by N bytes ---------------*/
3506 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3508 UInt otag = ecu | MC_OKIND_STACK;
3509 PROF_EVENT(MCPE_NEW_MEM_STACK);
3510 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3513 static void mc_new_mem_stack ( Addr a, SizeT len )
3515 PROF_EVENT(MCPE_NEW_MEM_STACK);
3516 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3519 static void mc_die_mem_stack ( Addr a, SizeT len )
3521 PROF_EVENT(MCPE_DIE_MEM_STACK);
3522 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3526 /* The AMD64 ABI says:
3528 "The 128-byte area beyond the location pointed to by %rsp is considered
3529 to be reserved and shall not be modified by signal or interrupt
3530 handlers. Therefore, functions may use this area for temporary data
3531 that is not needed across function calls. In particular, leaf functions
3532 may use this area for their entire stack frame, rather than adjusting
3533 the stack pointer in the prologue and epilogue. This area is known as
3534 red zone [sic]."
3536 So after any call or return we need to mark this redzone as containing
3537 undefined values.
3539 Consider this: we're in function f. f calls g. g moves rsp down
3540 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3541 defined. g returns. f is buggy and reads from parts of the red zone
3542 that it didn't write on. But because g filled that area in, f is going
3543 to be picking up defined V bits and so any errors from reading bits of
3544 the red zone it didn't write, will be missed. The only solution I could
3545 think of was to make the red zone undefined when g returns to f.
3547 This is in accordance with the ABI, which makes it clear the redzone
3548 is volatile across function calls.
3550 The problem occurs the other way round too: f could fill the RZ up
3551 with defined values and g could mistakenly read them. So the RZ
3552 also needs to be nuked on function calls.
3556 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3557 improved so as to have a lower miss rate. */
3559 static UWord stats__nia_cache_queries = 0;
3560 static UWord stats__nia_cache_misses = 0;
3562 typedef
3563 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3564 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3565 WCacheEnt;
3567 #define N_NIA_TO_ECU_CACHE 511
3569 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3571 static void init_nia_to_ecu_cache ( void )
3573 UWord i;
3574 Addr zero_addr = 0;
3575 ExeContext* zero_ec;
3576 UInt zero_ecu;
3577 /* Fill all the slots with an entry for address zero, and the
3578 relevant otags accordingly. Hence the cache is initially filled
3579 with valid data. */
3580 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3581 tl_assert(zero_ec);
3582 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3583 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3584 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3585 nia_to_ecu_cache[i].nia0 = zero_addr;
3586 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3587 nia_to_ecu_cache[i].nia1 = zero_addr;
3588 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3592 static inline UInt convert_nia_to_ecu ( Addr nia )
3594 UWord i;
3595 UInt ecu;
3596 ExeContext* ec;
3598 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3600 stats__nia_cache_queries++;
3601 i = nia % N_NIA_TO_ECU_CACHE;
3602 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3604 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3605 return nia_to_ecu_cache[i].ecu0;
3607 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3608 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3609 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3610 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3611 # undef SWAP
3612 return nia_to_ecu_cache[i].ecu0;
3615 stats__nia_cache_misses++;
3616 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3617 tl_assert(ec);
3618 ecu = VG_(get_ECU_from_ExeContext)(ec);
3619 tl_assert(VG_(is_plausible_ECU)(ecu));
3621 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3622 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3624 nia_to_ecu_cache[i].nia0 = nia;
3625 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3626 return ecu;
3630 /* This marks the stack as addressible but undefined, after a call or
3631 return for a target that has an ABI defined stack redzone. It
3632 happens quite a lot and needs to be fast. This is the version for
3633 origin tracking. The non-origin-tracking version is below. */
3634 VG_REGPARM(3)
3635 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
3637 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
3638 if (0)
3639 VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3640 base, len, nia );
3642 UInt ecu = convert_nia_to_ecu ( nia );
3643 tl_assert(VG_(is_plausible_ECU)(ecu));
3645 UInt otag = ecu | MC_OKIND_STACK;
3647 # if 0
3648 /* Slow(ish) version, which is fairly easily seen to be correct.
3650 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3651 make_aligned_word64_undefined_w_otag(base + 0, otag);
3652 make_aligned_word64_undefined_w_otag(base + 8, otag);
3653 make_aligned_word64_undefined_w_otag(base + 16, otag);
3654 make_aligned_word64_undefined_w_otag(base + 24, otag);
3656 make_aligned_word64_undefined_w_otag(base + 32, otag);
3657 make_aligned_word64_undefined_w_otag(base + 40, otag);
3658 make_aligned_word64_undefined_w_otag(base + 48, otag);
3659 make_aligned_word64_undefined_w_otag(base + 56, otag);
3661 make_aligned_word64_undefined_w_otag(base + 64, otag);
3662 make_aligned_word64_undefined_w_otag(base + 72, otag);
3663 make_aligned_word64_undefined_w_otag(base + 80, otag);
3664 make_aligned_word64_undefined_w_otag(base + 88, otag);
3666 make_aligned_word64_undefined_w_otag(base + 96, otag);
3667 make_aligned_word64_undefined_w_otag(base + 104, otag);
3668 make_aligned_word64_undefined_w_otag(base + 112, otag);
3669 make_aligned_word64_undefined_w_otag(base + 120, otag);
3670 } else {
3671 MC_(make_mem_undefined_w_otag)(base, len, otag);
3673 # endif
3675 /* Idea is: go fast when
3676 * 8-aligned and length is 128
3677 * the sm is available in the main primary map
3678 * the address range falls entirely with a single secondary map
3679 If all those conditions hold, just update the V+A bits by writing
3680 directly into the vabits array. (If the sm was distinguished, this
3681 will make a copy and then write to it.)
3683 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3684 /* Now we know the address range is suitably sized and aligned. */
3685 UWord a_lo = (UWord)(base);
3686 UWord a_hi = (UWord)(base + 128 - 1);
3687 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3688 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3689 /* Now we know the entire range is within the main primary map. */
3690 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3691 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3692 if (LIKELY(pm_off_lo == pm_off_hi)) {
3693 /* Now we know that the entire address range falls within a
3694 single secondary map, and that that secondary 'lives' in
3695 the main primary map. */
3696 SecMap* sm = get_secmap_for_writing_low(a_lo);
3697 UWord v_off16 = SM_OFF_16(a_lo);
3698 UShort* p = &sm->vabits16[v_off16];
3699 p[ 0] = VA_BITS16_UNDEFINED;
3700 p[ 1] = VA_BITS16_UNDEFINED;
3701 p[ 2] = VA_BITS16_UNDEFINED;
3702 p[ 3] = VA_BITS16_UNDEFINED;
3703 p[ 4] = VA_BITS16_UNDEFINED;
3704 p[ 5] = VA_BITS16_UNDEFINED;
3705 p[ 6] = VA_BITS16_UNDEFINED;
3706 p[ 7] = VA_BITS16_UNDEFINED;
3707 p[ 8] = VA_BITS16_UNDEFINED;
3708 p[ 9] = VA_BITS16_UNDEFINED;
3709 p[10] = VA_BITS16_UNDEFINED;
3710 p[11] = VA_BITS16_UNDEFINED;
3711 p[12] = VA_BITS16_UNDEFINED;
3712 p[13] = VA_BITS16_UNDEFINED;
3713 p[14] = VA_BITS16_UNDEFINED;
3714 p[15] = VA_BITS16_UNDEFINED;
3715 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3716 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3717 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3718 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3719 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3720 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3721 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3722 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3723 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3724 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3725 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3726 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3727 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3728 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3729 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3730 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3731 return;
3736 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3737 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3738 /* Now we know the address range is suitably sized and aligned. */
3739 UWord a_lo = (UWord)(base);
3740 UWord a_hi = (UWord)(base + 288 - 1);
3741 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3742 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3743 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3744 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3745 if (LIKELY(pm_off_lo == pm_off_hi)) {
3746 /* Now we know that the entire address range falls within a
3747 single secondary map, and that that secondary 'lives' in
3748 the main primary map. */
3749 SecMap* sm = get_secmap_for_writing_low(a_lo);
3750 UWord v_off16 = SM_OFF_16(a_lo);
3751 UShort* p = &sm->vabits16[v_off16];
3752 p[ 0] = VA_BITS16_UNDEFINED;
3753 p[ 1] = VA_BITS16_UNDEFINED;
3754 p[ 2] = VA_BITS16_UNDEFINED;
3755 p[ 3] = VA_BITS16_UNDEFINED;
3756 p[ 4] = VA_BITS16_UNDEFINED;
3757 p[ 5] = VA_BITS16_UNDEFINED;
3758 p[ 6] = VA_BITS16_UNDEFINED;
3759 p[ 7] = VA_BITS16_UNDEFINED;
3760 p[ 8] = VA_BITS16_UNDEFINED;
3761 p[ 9] = VA_BITS16_UNDEFINED;
3762 p[10] = VA_BITS16_UNDEFINED;
3763 p[11] = VA_BITS16_UNDEFINED;
3764 p[12] = VA_BITS16_UNDEFINED;
3765 p[13] = VA_BITS16_UNDEFINED;
3766 p[14] = VA_BITS16_UNDEFINED;
3767 p[15] = VA_BITS16_UNDEFINED;
3768 p[16] = VA_BITS16_UNDEFINED;
3769 p[17] = VA_BITS16_UNDEFINED;
3770 p[18] = VA_BITS16_UNDEFINED;
3771 p[19] = VA_BITS16_UNDEFINED;
3772 p[20] = VA_BITS16_UNDEFINED;
3773 p[21] = VA_BITS16_UNDEFINED;
3774 p[22] = VA_BITS16_UNDEFINED;
3775 p[23] = VA_BITS16_UNDEFINED;
3776 p[24] = VA_BITS16_UNDEFINED;
3777 p[25] = VA_BITS16_UNDEFINED;
3778 p[26] = VA_BITS16_UNDEFINED;
3779 p[27] = VA_BITS16_UNDEFINED;
3780 p[28] = VA_BITS16_UNDEFINED;
3781 p[29] = VA_BITS16_UNDEFINED;
3782 p[30] = VA_BITS16_UNDEFINED;
3783 p[31] = VA_BITS16_UNDEFINED;
3784 p[32] = VA_BITS16_UNDEFINED;
3785 p[33] = VA_BITS16_UNDEFINED;
3786 p[34] = VA_BITS16_UNDEFINED;
3787 p[35] = VA_BITS16_UNDEFINED;
3788 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3789 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3790 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3791 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3792 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3793 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3794 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3795 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3796 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3797 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3798 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3799 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3800 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3801 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3802 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3803 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3804 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3805 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3806 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3807 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3808 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3809 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3810 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3811 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3812 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3813 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3814 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3815 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3816 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3817 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3818 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3819 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3820 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3821 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3822 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3823 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3824 return;
3829 /* else fall into slow case */
3830 MC_(make_mem_undefined_w_otag)(base, len, otag);
3834 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3835 specialised for the non-origin-tracking case. */
3836 VG_REGPARM(2)
3837 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
3839 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
3840 if (0)
3841 VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3842 base, len );
3844 # if 0
3845 /* Slow(ish) version, which is fairly easily seen to be correct.
3847 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3848 make_aligned_word64_undefined(base + 0);
3849 make_aligned_word64_undefined(base + 8);
3850 make_aligned_word64_undefined(base + 16);
3851 make_aligned_word64_undefined(base + 24);
3853 make_aligned_word64_undefined(base + 32);
3854 make_aligned_word64_undefined(base + 40);
3855 make_aligned_word64_undefined(base + 48);
3856 make_aligned_word64_undefined(base + 56);
3858 make_aligned_word64_undefined(base + 64);
3859 make_aligned_word64_undefined(base + 72);
3860 make_aligned_word64_undefined(base + 80);
3861 make_aligned_word64_undefined(base + 88);
3863 make_aligned_word64_undefined(base + 96);
3864 make_aligned_word64_undefined(base + 104);
3865 make_aligned_word64_undefined(base + 112);
3866 make_aligned_word64_undefined(base + 120);
3867 } else {
3868 make_mem_undefined(base, len);
3870 # endif
3872 /* Idea is: go fast when
3873 * 8-aligned and length is 128
3874 * the sm is available in the main primary map
3875 * the address range falls entirely with a single secondary map
3876 If all those conditions hold, just update the V+A bits by writing
3877 directly into the vabits array. (If the sm was distinguished, this
3878 will make a copy and then write to it.)
3880 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3881 /* Now we know the address range is suitably sized and aligned. */
3882 UWord a_lo = (UWord)(base);
3883 UWord a_hi = (UWord)(base + 128 - 1);
3884 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3885 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3886 /* Now we know the entire range is within the main primary map. */
3887 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3888 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3889 if (LIKELY(pm_off_lo == pm_off_hi)) {
3890 /* Now we know that the entire address range falls within a
3891 single secondary map, and that that secondary 'lives' in
3892 the main primary map. */
3893 SecMap* sm = get_secmap_for_writing_low(a_lo);
3894 UWord v_off16 = SM_OFF_16(a_lo);
3895 UShort* p = &sm->vabits16[v_off16];
3896 p[ 0] = VA_BITS16_UNDEFINED;
3897 p[ 1] = VA_BITS16_UNDEFINED;
3898 p[ 2] = VA_BITS16_UNDEFINED;
3899 p[ 3] = VA_BITS16_UNDEFINED;
3900 p[ 4] = VA_BITS16_UNDEFINED;
3901 p[ 5] = VA_BITS16_UNDEFINED;
3902 p[ 6] = VA_BITS16_UNDEFINED;
3903 p[ 7] = VA_BITS16_UNDEFINED;
3904 p[ 8] = VA_BITS16_UNDEFINED;
3905 p[ 9] = VA_BITS16_UNDEFINED;
3906 p[10] = VA_BITS16_UNDEFINED;
3907 p[11] = VA_BITS16_UNDEFINED;
3908 p[12] = VA_BITS16_UNDEFINED;
3909 p[13] = VA_BITS16_UNDEFINED;
3910 p[14] = VA_BITS16_UNDEFINED;
3911 p[15] = VA_BITS16_UNDEFINED;
3912 return;
3917 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3918 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3919 /* Now we know the address range is suitably sized and aligned. */
3920 UWord a_lo = (UWord)(base);
3921 UWord a_hi = (UWord)(base + 288 - 1);
3922 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3923 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3924 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3925 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3926 if (LIKELY(pm_off_lo == pm_off_hi)) {
3927 /* Now we know that the entire address range falls within a
3928 single secondary map, and that that secondary 'lives' in
3929 the main primary map. */
3930 SecMap* sm = get_secmap_for_writing_low(a_lo);
3931 UWord v_off16 = SM_OFF_16(a_lo);
3932 UShort* p = &sm->vabits16[v_off16];
3933 p[ 0] = VA_BITS16_UNDEFINED;
3934 p[ 1] = VA_BITS16_UNDEFINED;
3935 p[ 2] = VA_BITS16_UNDEFINED;
3936 p[ 3] = VA_BITS16_UNDEFINED;
3937 p[ 4] = VA_BITS16_UNDEFINED;
3938 p[ 5] = VA_BITS16_UNDEFINED;
3939 p[ 6] = VA_BITS16_UNDEFINED;
3940 p[ 7] = VA_BITS16_UNDEFINED;
3941 p[ 8] = VA_BITS16_UNDEFINED;
3942 p[ 9] = VA_BITS16_UNDEFINED;
3943 p[10] = VA_BITS16_UNDEFINED;
3944 p[11] = VA_BITS16_UNDEFINED;
3945 p[12] = VA_BITS16_UNDEFINED;
3946 p[13] = VA_BITS16_UNDEFINED;
3947 p[14] = VA_BITS16_UNDEFINED;
3948 p[15] = VA_BITS16_UNDEFINED;
3949 p[16] = VA_BITS16_UNDEFINED;
3950 p[17] = VA_BITS16_UNDEFINED;
3951 p[18] = VA_BITS16_UNDEFINED;
3952 p[19] = VA_BITS16_UNDEFINED;
3953 p[20] = VA_BITS16_UNDEFINED;
3954 p[21] = VA_BITS16_UNDEFINED;
3955 p[22] = VA_BITS16_UNDEFINED;
3956 p[23] = VA_BITS16_UNDEFINED;
3957 p[24] = VA_BITS16_UNDEFINED;
3958 p[25] = VA_BITS16_UNDEFINED;
3959 p[26] = VA_BITS16_UNDEFINED;
3960 p[27] = VA_BITS16_UNDEFINED;
3961 p[28] = VA_BITS16_UNDEFINED;
3962 p[29] = VA_BITS16_UNDEFINED;
3963 p[30] = VA_BITS16_UNDEFINED;
3964 p[31] = VA_BITS16_UNDEFINED;
3965 p[32] = VA_BITS16_UNDEFINED;
3966 p[33] = VA_BITS16_UNDEFINED;
3967 p[34] = VA_BITS16_UNDEFINED;
3968 p[35] = VA_BITS16_UNDEFINED;
3969 return;
3974 /* else fall into slow case */
3975 make_mem_undefined(base, len);
3979 /* And this is an even more specialised case, for the case where there
3980 is no origin tracking, and the length is 128. */
3981 VG_REGPARM(1)
3982 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
3984 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
3985 if (0)
3986 VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
3988 # if 0
3989 /* Slow(ish) version, which is fairly easily seen to be correct.
3991 if (LIKELY( VG_IS_8_ALIGNED(base) )) {
3992 make_aligned_word64_undefined(base + 0);
3993 make_aligned_word64_undefined(base + 8);
3994 make_aligned_word64_undefined(base + 16);
3995 make_aligned_word64_undefined(base + 24);
3997 make_aligned_word64_undefined(base + 32);
3998 make_aligned_word64_undefined(base + 40);
3999 make_aligned_word64_undefined(base + 48);
4000 make_aligned_word64_undefined(base + 56);
4002 make_aligned_word64_undefined(base + 64);
4003 make_aligned_word64_undefined(base + 72);
4004 make_aligned_word64_undefined(base + 80);
4005 make_aligned_word64_undefined(base + 88);
4007 make_aligned_word64_undefined(base + 96);
4008 make_aligned_word64_undefined(base + 104);
4009 make_aligned_word64_undefined(base + 112);
4010 make_aligned_word64_undefined(base + 120);
4011 } else {
4012 make_mem_undefined(base, 128);
4014 # endif
4016 /* Idea is: go fast when
4017 * 16-aligned and length is 128
4018 * the sm is available in the main primary map
4019 * the address range falls entirely with a single secondary map
4020 If all those conditions hold, just update the V+A bits by writing
4021 directly into the vabits array. (If the sm was distinguished, this
4022 will make a copy and then write to it.)
4024 Typically this applies to amd64 'ret' instructions, since RSP is
4025 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4027 if (LIKELY( VG_IS_16_ALIGNED(base) )) {
4028 /* Now we know the address range is suitably sized and aligned. */
4029 UWord a_lo = (UWord)(base);
4030 UWord a_hi = (UWord)(base + 128 - 1);
4031 /* FIXME: come up with a sane story on the wraparound case
4032 (which of course cnanot happen, but still..) */
4033 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4034 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4035 /* Now we know the entire range is within the main primary map. */
4036 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4037 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4038 if (LIKELY(pm_off_lo == pm_off_hi)) {
4039 /* Now we know that the entire address range falls within a
4040 single secondary map, and that that secondary 'lives' in
4041 the main primary map. */
4042 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
4043 SecMap* sm = get_secmap_for_writing_low(a_lo);
4044 UWord v_off = SM_OFF(a_lo);
4045 UInt* w32 = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
4046 w32[ 0] = VA_BITS32_UNDEFINED;
4047 w32[ 1] = VA_BITS32_UNDEFINED;
4048 w32[ 2] = VA_BITS32_UNDEFINED;
4049 w32[ 3] = VA_BITS32_UNDEFINED;
4050 w32[ 4] = VA_BITS32_UNDEFINED;
4051 w32[ 5] = VA_BITS32_UNDEFINED;
4052 w32[ 6] = VA_BITS32_UNDEFINED;
4053 w32[ 7] = VA_BITS32_UNDEFINED;
4054 return;
4059 /* The same, but for when base is 8 % 16, which is the situation
4060 with RSP for amd64-ELF immediately after call instructions.
4062 if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
4063 /* Now we know the address range is suitably sized and aligned. */
4064 UWord a_lo = (UWord)(base);
4065 UWord a_hi = (UWord)(base + 128 - 1);
4066 /* FIXME: come up with a sane story on the wraparound case
4067 (which of course cnanot happen, but still..) */
4068 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4069 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4070 /* Now we know the entire range is within the main primary map. */
4071 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4072 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4073 if (LIKELY(pm_off_lo == pm_off_hi)) {
4074 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
4075 /* Now we know that the entire address range falls within a
4076 single secondary map, and that that secondary 'lives' in
4077 the main primary map. */
4078 SecMap* sm = get_secmap_for_writing_low(a_lo);
4079 UWord v_off16 = SM_OFF_16(a_lo);
4080 UShort* w16 = &sm->vabits16[v_off16];
4081 UInt* w32 = ASSUME_ALIGNED(UInt*, &w16[1]);
4082 /* The following assertion is commented out for obvious
4083 performance reasons, but was verified as valid when
4084 running the entire testsuite and also Firefox. */
4085 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4086 w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
4087 w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
4088 w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
4089 w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
4090 w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
4091 w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
4092 w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
4093 w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
4094 w16[15] = VA_BITS16_UNDEFINED; // w16[15]
4095 return;
4100 /* else fall into slow case */
4101 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
4102 make_mem_undefined(base, 128);
4106 /*------------------------------------------------------------*/
4107 /*--- Checking memory ---*/
4108 /*------------------------------------------------------------*/
4110 typedef
4111 enum {
4112 MC_Ok = 5,
4113 MC_AddrErr = 6,
4114 MC_ValueErr = 7
4116 MC_ReadResult;
4119 /* Check permissions for address range. If inadequate permissions
4120 exist, *bad_addr is set to the offending address, so the caller can
4121 know what it is. */
4123 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4124 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4125 indicate the lowest failing address. Functions below are
4126 similar. */
4127 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
4129 SizeT i;
4130 UWord vabits2;
4132 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
4133 for (i = 0; i < len; i++) {
4134 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
4135 vabits2 = get_vabits2(a);
4136 if (VA_BITS2_NOACCESS != vabits2) {
4137 if (bad_addr != NULL) *bad_addr = a;
4138 return False;
4140 a++;
4142 return True;
4145 static Bool is_mem_addressable ( Addr a, SizeT len,
4146 /*OUT*/Addr* bad_addr )
4148 SizeT i;
4149 UWord vabits2;
4151 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
4152 for (i = 0; i < len; i++) {
4153 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
4154 vabits2 = get_vabits2(a);
4155 if (VA_BITS2_NOACCESS == vabits2) {
4156 if (bad_addr != NULL) *bad_addr = a;
4157 return False;
4159 a++;
4161 return True;
4164 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
4165 /*OUT*/Addr* bad_addr,
4166 /*OUT*/UInt* otag )
4168 SizeT i;
4169 UWord vabits2;
4171 PROF_EVENT(MCPE_IS_MEM_DEFINED);
4172 DEBUG("is_mem_defined\n");
4174 if (otag) *otag = 0;
4175 if (bad_addr) *bad_addr = 0;
4176 for (i = 0; i < len; i++) {
4177 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
4178 vabits2 = get_vabits2(a);
4179 if (VA_BITS2_DEFINED != vabits2) {
4180 // Error! Nb: Report addressability errors in preference to
4181 // definedness errors. And don't report definedeness errors unless
4182 // --undef-value-errors=yes.
4183 if (bad_addr) {
4184 *bad_addr = a;
4186 if (VA_BITS2_NOACCESS == vabits2) {
4187 return MC_AddrErr;
4189 if (MC_(clo_mc_level) >= 2) {
4190 if (otag && MC_(clo_mc_level) == 3) {
4191 *otag = MC_(helperc_b_load1)( a );
4193 return MC_ValueErr;
4196 a++;
4198 return MC_Ok;
4202 /* Like is_mem_defined but doesn't give up at the first uninitialised
4203 byte -- the entire range is always checked. This is important for
4204 detecting errors in the case where a checked range strays into
4205 invalid memory, but that fact is not detected by the ordinary
4206 is_mem_defined(), because of an undefined section that precedes the
4207 out of range section, possibly as a result of an alignment hole in
4208 the checked data. This version always checks the entire range and
4209 can report both a definedness and an accessbility error, if
4210 necessary. */
4211 static void is_mem_defined_comprehensive (
4212 Addr a, SizeT len,
4213 /*OUT*/Bool* errorV, /* is there a definedness err? */
4214 /*OUT*/Addr* bad_addrV, /* if so where? */
4215 /*OUT*/UInt* otagV, /* and what's its otag? */
4216 /*OUT*/Bool* errorA, /* is there an addressability err? */
4217 /*OUT*/Addr* bad_addrA /* if so where? */
4220 SizeT i;
4221 UWord vabits2;
4222 Bool already_saw_errV = False;
4224 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
4225 DEBUG("is_mem_defined_comprehensive\n");
4227 tl_assert(!(*errorV || *errorA));
4229 for (i = 0; i < len; i++) {
4230 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
4231 vabits2 = get_vabits2(a);
4232 switch (vabits2) {
4233 case VA_BITS2_DEFINED:
4234 a++;
4235 break;
4236 case VA_BITS2_UNDEFINED:
4237 case VA_BITS2_PARTDEFINED:
4238 if (!already_saw_errV) {
4239 *errorV = True;
4240 *bad_addrV = a;
4241 if (MC_(clo_mc_level) == 3) {
4242 *otagV = MC_(helperc_b_load1)( a );
4243 } else {
4244 *otagV = 0;
4246 already_saw_errV = True;
4248 a++; /* keep going */
4249 break;
4250 case VA_BITS2_NOACCESS:
4251 *errorA = True;
4252 *bad_addrA = a;
4253 return; /* give up now. */
4254 default:
4255 tl_assert(0);
4261 /* Check a zero-terminated ascii string. Tricky -- don't want to
4262 examine the actual bytes, to find the end, until we're sure it is
4263 safe to do so. */
4265 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
4267 UWord vabits2;
4269 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
4270 DEBUG("mc_is_defined_asciiz\n");
4272 if (otag) *otag = 0;
4273 if (bad_addr) *bad_addr = 0;
4274 while (True) {
4275 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
4276 vabits2 = get_vabits2(a);
4277 if (VA_BITS2_DEFINED != vabits2) {
4278 // Error! Nb: Report addressability errors in preference to
4279 // definedness errors. And don't report definedeness errors unless
4280 // --undef-value-errors=yes.
4281 if (bad_addr) {
4282 *bad_addr = a;
4284 if (VA_BITS2_NOACCESS == vabits2) {
4285 return MC_AddrErr;
4287 if (MC_(clo_mc_level) >= 2) {
4288 if (otag && MC_(clo_mc_level) == 3) {
4289 *otag = MC_(helperc_b_load1)( a );
4291 return MC_ValueErr;
4294 /* Ok, a is safe to read. */
4295 if (* ((UChar*)a) == 0) {
4296 return MC_Ok;
4298 a++;
4303 /*------------------------------------------------------------*/
4304 /*--- Memory event handlers ---*/
4305 /*------------------------------------------------------------*/
4307 static
4308 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
4309 Addr base, SizeT size )
4311 Addr bad_addr;
4312 Bool ok = is_mem_addressable ( base, size, &bad_addr );
4314 if (!ok) {
4315 switch (part) {
4316 case Vg_CoreSysCall:
4317 MC_(record_memparam_error) ( tid, bad_addr,
4318 /*isAddrErr*/True, s, 0/*otag*/ );
4319 break;
4321 case Vg_CoreSignal:
4322 MC_(record_core_mem_error)( tid, s );
4323 break;
4325 default:
4326 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
4331 static
4332 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
4333 Addr base, SizeT size )
4335 UInt otag = 0;
4336 Addr bad_addr;
4337 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
4339 if (MC_Ok != res) {
4340 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4342 switch (part) {
4343 case Vg_CoreSysCall:
4344 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4345 isAddrErr ? 0 : otag );
4346 break;
4348 case Vg_CoreSysCallArgInMem:
4349 MC_(record_regparam_error) ( tid, s, otag );
4350 break;
4352 /* If we're being asked to jump to a silly address, record an error
4353 message before potentially crashing the entire system. */
4354 case Vg_CoreTranslate:
4355 MC_(record_jump_error)( tid, bad_addr );
4356 break;
4358 default:
4359 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
4364 static
4365 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
4366 const HChar* s, Addr str )
4368 MC_ReadResult res;
4369 Addr bad_addr = 0; // shut GCC up
4370 UInt otag = 0;
4372 tl_assert(part == Vg_CoreSysCall);
4373 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4374 if (MC_Ok != res) {
4375 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4376 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4377 isAddrErr ? 0 : otag );
4381 /* Handling of mmap and mprotect is not as simple as it seems.
4383 The underlying semantics are that memory obtained from mmap is
4384 always initialised, but may be inaccessible. And changes to the
4385 protection of memory do not change its contents and hence not its
4386 definedness state. Problem is we can't model
4387 inaccessible-but-with-some-definedness state; once we mark memory
4388 as inaccessible we lose all info about definedness, and so can't
4389 restore that if it is later made accessible again.
4391 One obvious thing to do is this:
4393 mmap/mprotect NONE -> noaccess
4394 mmap/mprotect other -> defined
4396 The problem case here is: taking accessible memory, writing
4397 uninitialised data to it, mprotecting it NONE and later mprotecting
4398 it back to some accessible state causes the undefinedness to be
4399 lost.
4401 A better proposal is:
4403 (1) mmap NONE -> make noaccess
4404 (2) mmap other -> make defined
4406 (3) mprotect NONE -> # no change
4407 (4) mprotect other -> change any "noaccess" to "defined"
4409 (2) is OK because memory newly obtained from mmap really is defined
4410 (zeroed out by the kernel -- doing anything else would
4411 constitute a massive security hole.)
4413 (1) is OK because the only way to make the memory usable is via
4414 (4), in which case we also wind up correctly marking it all as
4415 defined.
4417 (3) is the weak case. We choose not to change memory state.
4418 (presumably the range is in some mixture of "defined" and
4419 "undefined", viz, accessible but with arbitrary V bits). Doing
4420 nothing means we retain the V bits, so that if the memory is
4421 later mprotected "other", the V bits remain unchanged, so there
4422 can be no false negatives. The bad effect is that if there's
4423 an access in the area, then MC cannot warn; but at least we'll
4424 get a SEGV to show, so it's better than nothing.
4426 Consider the sequence (3) followed by (4). Any memory that was
4427 "defined" or "undefined" previously retains its state (as
4428 required). Any memory that was "noaccess" before can only have
4429 been made that way by (1), and so it's OK to change it to
4430 "defined".
4432 See https://bugs.kde.org/show_bug.cgi?id=205541
4433 and https://bugs.kde.org/show_bug.cgi?id=210268
4435 static
4436 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4437 ULong di_handle )
4439 if (rr || ww || xx) {
4440 /* (2) mmap/mprotect other -> defined */
4441 MC_(make_mem_defined)(a, len);
4442 } else {
4443 /* (1) mmap/mprotect NONE -> noaccess */
4444 MC_(make_mem_noaccess)(a, len);
4448 static
4449 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4451 if (rr || ww || xx) {
4452 /* (4) mprotect other -> change any "noaccess" to "defined" */
4453 make_mem_defined_if_noaccess(a, len);
4454 } else {
4455 /* (3) mprotect NONE -> # no change */
4456 /* do nothing */
4461 static
4462 void mc_new_mem_startup( Addr a, SizeT len,
4463 Bool rr, Bool ww, Bool xx, ULong di_handle )
4465 // Because code is defined, initialised variables get put in the data
4466 // segment and are defined, and uninitialised variables get put in the
4467 // bss segment and are auto-zeroed (and so defined).
4469 // It's possible that there will be padding between global variables.
4470 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4471 // a program uses it, Memcheck will not complain. This is arguably a
4472 // false negative, but it's a grey area -- the behaviour is defined (the
4473 // padding is zeroed) but it's probably not what the user intended. And
4474 // we can't avoid it.
4476 // Note: we generally ignore RWX permissions, because we can't track them
4477 // without requiring more than one A bit which would slow things down a
4478 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4479 // So we mark any such pages as "unaddressable".
4480 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4481 a, (ULong)len, rr, ww, xx);
4482 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4485 static
4486 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4488 MC_(make_mem_defined)(a, len);
4492 /*------------------------------------------------------------*/
4493 /*--- Register event handlers ---*/
4494 /*------------------------------------------------------------*/
4496 /* Try and get a nonzero origin for the guest state section of thread
4497 tid characterised by (offset,size). Return 0 if nothing to show
4498 for it. */
4499 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4500 Int offset, SizeT size )
4502 Int sh2off;
4503 UInt area[3];
4504 UInt otag;
4505 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4506 if (sh2off == -1)
4507 return 0; /* This piece of guest state is not tracked */
4508 tl_assert(sh2off >= 0);
4509 tl_assert(0 == (sh2off % 4));
4510 area[0] = 0x31313131;
4511 area[2] = 0x27272727;
4512 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4513 tl_assert(area[0] == 0x31313131);
4514 tl_assert(area[2] == 0x27272727);
4515 otag = area[1];
4516 return otag;
4520 /* When some chunk of guest state is written, mark the corresponding
4521 shadow area as valid. This is used to initialise arbitrarily large
4522 chunks of guest state, hence the _SIZE value, which has to be as
4523 big as the biggest guest state.
4525 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4526 PtrdiffT offset, SizeT size)
4528 # define MAX_REG_WRITE_SIZE 1744
4529 UChar area[MAX_REG_WRITE_SIZE];
4530 tl_assert(size <= MAX_REG_WRITE_SIZE);
4531 VG_(memset)(area, V_BITS8_DEFINED, size);
4532 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4533 # undef MAX_REG_WRITE_SIZE
4536 static
4537 void mc_post_reg_write_clientcall ( ThreadId tid,
4538 PtrdiffT offset, SizeT size, Addr f)
4540 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4543 /* Look at the definedness of the guest's shadow state for
4544 [offset, offset+len). If any part of that is undefined, record
4545 a parameter error.
4547 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4548 PtrdiffT offset, SizeT size)
4550 Int i;
4551 Bool bad;
4552 UInt otag;
4554 UChar area[16];
4555 tl_assert(size <= 16);
4557 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4559 bad = False;
4560 for (i = 0; i < size; i++) {
4561 if (area[i] != V_BITS8_DEFINED) {
4562 bad = True;
4563 break;
4567 if (!bad)
4568 return;
4570 /* We've found some undefinedness. See if we can also find an
4571 origin for it. */
4572 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4573 MC_(record_regparam_error) ( tid, s, otag );
4577 /*------------------------------------------------------------*/
4578 /*--- Register-memory event handlers ---*/
4579 /*------------------------------------------------------------*/
4581 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4582 PtrdiffT guest_state_offset, SizeT size )
4584 SizeT i;
4585 UChar vbits8;
4586 Int offset;
4587 UInt d32;
4589 /* Slow loop. */
4590 for (i = 0; i < size; i++) {
4591 get_vbits8( a+i, &vbits8 );
4592 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4593 1, &vbits8 );
4596 if (MC_(clo_mc_level) != 3)
4597 return;
4599 /* Track origins. */
4600 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4601 if (offset == -1)
4602 return;
4604 switch (size) {
4605 case 1:
4606 d32 = MC_(helperc_b_load1)( a );
4607 break;
4608 case 2:
4609 d32 = MC_(helperc_b_load2)( a );
4610 break;
4611 case 4:
4612 d32 = MC_(helperc_b_load4)( a );
4613 break;
4614 case 8:
4615 d32 = MC_(helperc_b_load8)( a );
4616 break;
4617 case 16:
4618 d32 = MC_(helperc_b_load16)( a );
4619 break;
4620 case 32:
4621 d32 = MC_(helperc_b_load32)( a );
4622 break;
4623 default:
4624 tl_assert(0);
4627 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4630 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4631 PtrdiffT guest_state_offset, Addr a,
4632 SizeT size )
4634 SizeT i;
4635 UChar vbits8;
4636 Int offset;
4637 UInt d32;
4639 /* Slow loop. */
4640 for (i = 0; i < size; i++) {
4641 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4642 guest_state_offset+i, 1 );
4643 set_vbits8( a+i, vbits8 );
4646 if (MC_(clo_mc_level) != 3)
4647 return;
4649 /* Track origins. */
4650 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4651 if (offset == -1)
4652 return;
4654 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4655 switch (size) {
4656 case 1:
4657 MC_(helperc_b_store1)( a, d32 );
4658 break;
4659 case 2:
4660 MC_(helperc_b_store2)( a, d32 );
4661 break;
4662 case 4:
4663 MC_(helperc_b_store4)( a, d32 );
4664 break;
4665 case 8:
4666 MC_(helperc_b_store8)( a, d32 );
4667 break;
4668 case 16:
4669 MC_(helperc_b_store16)( a, d32 );
4670 break;
4671 case 32:
4672 MC_(helperc_b_store32)( a, d32 );
4673 break;
4674 default:
4675 tl_assert(0);
4680 /*------------------------------------------------------------*/
4681 /*--- Some static assertions ---*/
4682 /*------------------------------------------------------------*/
4684 /* The handwritten assembly helpers below have baked-in assumptions
4685 about various constant values. These assertions attempt to make
4686 that a bit safer by checking those values and flagging changes that
4687 would make the assembly invalid. Not perfect but it's better than
4688 nothing. */
4690 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4692 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4693 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4695 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4696 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4698 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4699 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4701 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4702 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4704 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4705 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4707 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4708 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4711 /*------------------------------------------------------------*/
4712 /*--- Functions called directly from generated code: ---*/
4713 /*--- Load/store handlers. ---*/
4714 /*------------------------------------------------------------*/
4716 /* Types: LOADV32, LOADV16, LOADV8 are:
4717 UWord fn ( Addr a )
4718 so they return 32-bits on 32-bit machines and 64-bits on
4719 64-bit machines. Addr has the same size as a host word.
4721 LOADV64 is always ULong fn ( Addr a )
4723 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4724 are a UWord, and for STOREV64 they are a ULong.
4727 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4728 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4729 primary map. This is all very tricky (and important!), so let's
4730 work through the maths by hand (below), *and* assert for these
4731 values at startup. */
4732 #define MASK(_szInBytes) \
4733 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4735 /* MASK only exists so as to define this macro. */
4736 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4737 ((_a) & MASK((_szInBits>>3)))
4739 /* On a 32-bit machine:
4741 N_PRIMARY_BITS == 16, so
4742 N_PRIMARY_MAP == 0x10000, so
4743 N_PRIMARY_MAP-1 == 0xFFFF, so
4744 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4746 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4747 = ~ ( 0xFFFF | 0xFFFF0000 )
4748 = ~ 0xFFFF'FFFF
4751 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4752 = ~ ( 0xFFFE | 0xFFFF0000 )
4753 = ~ 0xFFFF'FFFE
4756 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4757 = ~ ( 0xFFFC | 0xFFFF0000 )
4758 = ~ 0xFFFF'FFFC
4761 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4762 = ~ ( 0xFFF8 | 0xFFFF0000 )
4763 = ~ 0xFFFF'FFF8
4766 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4767 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4768 the 1-byte alignment case, it is always a zero value, since MASK(1)
4769 is zero. All as expected.
4771 On a 64-bit machine, it's more complex, since we're testing
4772 simultaneously for misalignment and for the address being at or
4773 above 64G:
4775 N_PRIMARY_BITS == 20, so
4776 N_PRIMARY_MAP == 0x100000, so
4777 N_PRIMARY_MAP-1 == 0xFFFFF, so
4778 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4780 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4781 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4782 = ~ 0xF'FFFF'FFFF
4783 = 0xFFFF'FFF0'0000'0000
4785 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4786 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4787 = ~ 0xF'FFFF'FFFE
4788 = 0xFFFF'FFF0'0000'0001
4790 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4791 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4792 = ~ 0xF'FFFF'FFFC
4793 = 0xFFFF'FFF0'0000'0003
4795 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4796 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4797 = ~ 0xF'FFFF'FFF8
4798 = 0xFFFF'FFF0'0000'0007
4801 /*------------------------------------------------------------*/
4802 /*--- LOADV256 and LOADV128 ---*/
4803 /*------------------------------------------------------------*/
4805 static INLINE
4806 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4807 Addr a, SizeT nBits, Bool isBigEndian )
4809 PROF_EVENT(MCPE_LOADV_128_OR_256);
4811 #ifndef PERF_FAST_LOADV
4812 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4813 return;
4814 #else
4816 UWord sm_off16, vabits16, j;
4817 UWord nBytes = nBits / 8;
4818 UWord nULongs = nBytes / 8;
4819 SecMap* sm;
4821 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4822 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4823 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4824 return;
4827 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4828 suitably aligned, is mapped, and addressible. */
4829 for (j = 0; j < nULongs; j++) {
4830 sm = get_secmap_for_reading_low(a + 8*j);
4831 sm_off16 = SM_OFF_16(a + 8*j);
4832 vabits16 = sm->vabits16[sm_off16];
4834 // Convert V bits from compact memory form to expanded
4835 // register form.
4836 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4837 res[j] = V_BITS64_DEFINED;
4838 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4839 res[j] = V_BITS64_UNDEFINED;
4840 } else {
4841 /* Slow case: some block of 8 bytes are not all-defined or
4842 all-undefined. */
4843 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4844 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4845 return;
4848 return;
4850 #endif
4853 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4855 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4857 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4859 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4862 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4864 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4866 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4868 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4871 /*------------------------------------------------------------*/
4872 /*--- LOADV64 ---*/
4873 /*------------------------------------------------------------*/
4875 static INLINE
4876 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4878 PROF_EVENT(MCPE_LOADV64);
4880 #ifndef PERF_FAST_LOADV
4881 return mc_LOADVn_slow( a, 64, isBigEndian );
4882 #else
4884 UWord sm_off16, vabits16;
4885 SecMap* sm;
4887 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4888 PROF_EVENT(MCPE_LOADV64_SLOW1);
4889 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4892 sm = get_secmap_for_reading_low(a);
4893 sm_off16 = SM_OFF_16(a);
4894 vabits16 = sm->vabits16[sm_off16];
4896 // Handle common case quickly: a is suitably aligned, is mapped, and
4897 // addressible.
4898 // Convert V bits from compact memory form to expanded register form.
4899 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4900 return V_BITS64_DEFINED;
4901 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4902 return V_BITS64_UNDEFINED;
4903 } else {
4904 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4905 PROF_EVENT(MCPE_LOADV64_SLOW2);
4906 return mc_LOADVn_slow( a, 64, isBigEndian );
4909 #endif
4912 // Generic for all platforms
4913 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4915 return mc_LOADV64(a, True);
4918 // Non-generic assembly for arm32-linux
4919 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4920 && defined(VGP_arm_linux)
4921 /* See mc_main_asm.c */
4923 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4924 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4925 /* See mc_main_asm.c */
4927 #else
4928 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
4929 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4931 return mc_LOADV64(a, False);
4933 #endif
4935 /*------------------------------------------------------------*/
4936 /*--- STOREV64 ---*/
4937 /*------------------------------------------------------------*/
4939 static INLINE
4940 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4942 PROF_EVENT(MCPE_STOREV64);
4944 #ifndef PERF_FAST_STOREV
4945 // XXX: this slow case seems to be marginally faster than the fast case!
4946 // Investigate further.
4947 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4948 #else
4950 UWord sm_off16, vabits16;
4951 SecMap* sm;
4953 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4954 PROF_EVENT(MCPE_STOREV64_SLOW1);
4955 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4956 return;
4959 sm = get_secmap_for_reading_low(a);
4960 sm_off16 = SM_OFF_16(a);
4961 vabits16 = sm->vabits16[sm_off16];
4963 // To understand the below cleverness, see the extensive comments
4964 // in MC_(helperc_STOREV8).
4965 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4966 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4967 return;
4969 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4970 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
4971 return;
4973 PROF_EVENT(MCPE_STOREV64_SLOW2);
4974 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4975 return;
4977 if (V_BITS64_UNDEFINED == vbits64) {
4978 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4979 return;
4981 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4982 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
4983 return;
4985 PROF_EVENT(MCPE_STOREV64_SLOW3);
4986 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4987 return;
4990 PROF_EVENT(MCPE_STOREV64_SLOW4);
4991 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4993 #endif
4996 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4998 mc_STOREV64(a, vbits64, True);
5000 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
5002 mc_STOREV64(a, vbits64, False);
5005 /*------------------------------------------------------------*/
5006 /*--- LOADV32 ---*/
5007 /*------------------------------------------------------------*/
5009 static INLINE
5010 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
5012 PROF_EVENT(MCPE_LOADV32);
5014 #ifndef PERF_FAST_LOADV
5015 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5016 #else
5018 UWord sm_off, vabits8;
5019 SecMap* sm;
5021 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5022 PROF_EVENT(MCPE_LOADV32_SLOW1);
5023 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5026 sm = get_secmap_for_reading_low(a);
5027 sm_off = SM_OFF(a);
5028 vabits8 = sm->vabits8[sm_off];
5030 // Handle common case quickly: a is suitably aligned, is mapped, and the
5031 // entire word32 it lives in is addressible.
5032 // Convert V bits from compact memory form to expanded register form.
5033 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5034 // Almost certainly not necessary, but be paranoid.
5035 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5036 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
5037 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
5038 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
5039 } else {
5040 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5041 PROF_EVENT(MCPE_LOADV32_SLOW2);
5042 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5045 #endif
5048 // Generic for all platforms
5049 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
5051 return mc_LOADV32(a, True);
5054 // Non-generic assembly for arm32-linux
5055 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5056 && defined(VGP_arm_linux)
5057 /* See mc_main_asm.c */
5059 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5060 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5061 /* See mc_main_asm.c */
5063 #else
5064 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5065 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
5067 return mc_LOADV32(a, False);
5069 #endif
5071 /*------------------------------------------------------------*/
5072 /*--- STOREV32 ---*/
5073 /*------------------------------------------------------------*/
5075 static INLINE
5076 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
5078 PROF_EVENT(MCPE_STOREV32);
5080 #ifndef PERF_FAST_STOREV
5081 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5082 #else
5084 UWord sm_off, vabits8;
5085 SecMap* sm;
5087 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5088 PROF_EVENT(MCPE_STOREV32_SLOW1);
5089 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5090 return;
5093 sm = get_secmap_for_reading_low(a);
5094 sm_off = SM_OFF(a);
5095 vabits8 = sm->vabits8[sm_off];
5097 // To understand the below cleverness, see the extensive comments
5098 // in MC_(helperc_STOREV8).
5099 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
5100 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
5101 return;
5103 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
5104 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
5105 return;
5107 PROF_EVENT(MCPE_STOREV32_SLOW2);
5108 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5109 return;
5111 if (V_BITS32_UNDEFINED == vbits32) {
5112 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
5113 return;
5115 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
5116 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
5117 return;
5119 PROF_EVENT(MCPE_STOREV32_SLOW3);
5120 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5121 return;
5124 PROF_EVENT(MCPE_STOREV32_SLOW4);
5125 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5127 #endif
5130 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
5132 mc_STOREV32(a, vbits32, True);
5134 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
5136 mc_STOREV32(a, vbits32, False);
5139 /*------------------------------------------------------------*/
5140 /*--- LOADV16 ---*/
5141 /*------------------------------------------------------------*/
5143 static INLINE
5144 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
5146 PROF_EVENT(MCPE_LOADV16);
5148 #ifndef PERF_FAST_LOADV
5149 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5150 #else
5152 UWord sm_off, vabits8;
5153 SecMap* sm;
5155 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5156 PROF_EVENT(MCPE_LOADV16_SLOW1);
5157 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5160 sm = get_secmap_for_reading_low(a);
5161 sm_off = SM_OFF(a);
5162 vabits8 = sm->vabits8[sm_off];
5163 // Handle common case quickly: a is suitably aligned, is mapped, and is
5164 // addressible.
5165 // Convert V bits from compact memory form to expanded register form
5166 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
5167 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
5168 else {
5169 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5170 // the two sub-bytes.
5171 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
5172 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
5173 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
5174 else {
5175 /* Slow case: the two bytes are not all-defined or all-undefined. */
5176 PROF_EVENT(MCPE_LOADV16_SLOW2);
5177 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5181 #endif
5184 // Generic for all platforms
5185 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
5187 return mc_LOADV16(a, True);
5190 // Non-generic assembly for arm32-linux
5191 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5192 && defined(VGP_arm_linux)
5193 __asm__( /* Derived from NCode template */
5194 ".text \n"
5195 ".align 2 \n"
5196 ".global vgMemCheck_helperc_LOADV16le \n"
5197 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5198 "vgMemCheck_helperc_LOADV16le: \n" //
5199 " tst r0, #1 \n" //
5200 " bne .LLV16LEc12 \n" // if misaligned
5201 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5202 " movw r3, #:lower16:primary_map \n" //
5203 " uxth r1, r0 \n" // r1 = sec-map-offB
5204 " movt r3, #:upper16:primary_map \n" //
5205 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5206 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5207 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5208 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5209 ".LLV16LEh9: \n" //
5210 " mov r0, #0xFFFFFFFF \n" //
5211 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5212 " bx lr \n" //
5213 ".LLV16LEc0: \n" //
5214 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5215 " bne .LLV16LEc4 \n" //
5216 ".LLV16LEc2: \n" //
5217 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5218 " bx lr \n" //
5219 ".LLV16LEc4: \n" //
5220 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5221 // Extract the relevant 4 bits and inspect.
5222 " and r2, r0, #2 \n" // addr & 2
5223 " add r2, r2, r2 \n" // 2 * (addr & 2)
5224 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5225 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5227 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5228 " beq .LLV16LEh9 \n" //
5230 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5231 " beq .LLV16LEc2 \n" //
5233 ".LLV16LEc12: \n" //
5234 " push {r4, lr} \n" //
5235 " mov r2, #0 \n" //
5236 " mov r1, #16 \n" //
5237 " bl mc_LOADVn_slow \n" //
5238 " pop {r4, pc} \n" //
5239 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5240 ".previous\n"
5243 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5244 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5245 __asm__(
5246 ".text\n"
5247 ".align 16\n"
5248 ".global vgMemCheck_helperc_LOADV16le\n"
5249 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5250 "vgMemCheck_helperc_LOADV16le:\n"
5251 " test $0x1, %eax\n"
5252 " jne .LLV16LE5\n" /* jump if not aligned */
5253 " mov %eax, %edx\n"
5254 " shr $0x10, %edx\n"
5255 " mov primary_map(,%edx,4), %ecx\n"
5256 " movzwl %ax, %edx\n"
5257 " shr $0x2, %edx\n"
5258 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5259 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5260 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5261 ".LLV16LE1:\n"
5262 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5263 " ret\n"
5264 ".LLV16LE2:\n"
5265 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5266 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5267 ".LLV16LE3:\n"
5268 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5269 " ret\n"
5270 ".LLV16LE4:\n"
5271 " mov %eax, %ecx\n"
5272 " and $0x2, %ecx\n"
5273 " add %ecx, %ecx\n"
5274 " sar %cl, %edx\n"
5275 " and $0xf, %edx\n"
5276 " cmp $0xa, %edx\n"
5277 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5278 " cmp $0x5, %edx\n"
5279 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5280 ".LLV16LE5:\n"
5281 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5282 " mov $16, %edx\n"
5283 " jmp mc_LOADVn_slow\n"
5284 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5285 ".previous\n"
5288 #else
5289 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5290 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5292 return mc_LOADV16(a, False);
5294 #endif
5296 /*------------------------------------------------------------*/
5297 /*--- STOREV16 ---*/
5298 /*------------------------------------------------------------*/
5300 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5301 static INLINE
5302 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5304 UInt shift;
5305 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5306 shift = (a & 2) << 1; // shift by 0 or 4
5307 vabits8 >>= shift; // shift the four bits to the bottom
5308 // check 2 x vabits2 != VA_BITS2_NOACCESS
5309 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5310 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5313 static INLINE
5314 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5316 PROF_EVENT(MCPE_STOREV16);
5318 #ifndef PERF_FAST_STOREV
5319 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5320 #else
5322 UWord sm_off, vabits8;
5323 SecMap* sm;
5325 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5326 PROF_EVENT(MCPE_STOREV16_SLOW1);
5327 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5328 return;
5331 sm = get_secmap_for_reading_low(a);
5332 sm_off = SM_OFF(a);
5333 vabits8 = sm->vabits8[sm_off];
5335 // To understand the below cleverness, see the extensive comments
5336 // in MC_(helperc_STOREV8).
5337 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5338 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5339 return;
5341 if (!is_distinguished_sm(sm)
5342 && accessible_vabits4_in_vabits8(a, vabits8)) {
5343 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5344 &(sm->vabits8[sm_off]) );
5345 return;
5347 PROF_EVENT(MCPE_STOREV16_SLOW2);
5348 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5350 if (V_BITS16_UNDEFINED == vbits16) {
5351 if (vabits8 == VA_BITS8_UNDEFINED) {
5352 return;
5354 if (!is_distinguished_sm(sm)
5355 && accessible_vabits4_in_vabits8(a, vabits8)) {
5356 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5357 &(sm->vabits8[sm_off]) );
5358 return;
5360 PROF_EVENT(MCPE_STOREV16_SLOW3);
5361 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5362 return;
5365 PROF_EVENT(MCPE_STOREV16_SLOW4);
5366 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5368 #endif
5372 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5374 mc_STOREV16(a, vbits16, True);
5376 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5378 mc_STOREV16(a, vbits16, False);
5381 /*------------------------------------------------------------*/
5382 /*--- LOADV8 ---*/
5383 /*------------------------------------------------------------*/
5385 /* Note: endianness is irrelevant for size == 1 */
5387 // Non-generic assembly for arm32-linux
5388 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5389 && defined(VGP_arm_linux)
5390 __asm__( /* Derived from NCode template */
5391 ".text \n"
5392 ".align 2 \n"
5393 ".global vgMemCheck_helperc_LOADV8 \n"
5394 ".type vgMemCheck_helperc_LOADV8, %function \n"
5395 "vgMemCheck_helperc_LOADV8: \n" //
5396 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5397 " movw r3, #:lower16:primary_map \n" //
5398 " uxth r1, r0 \n" // r1 = sec-map-offB
5399 " movt r3, #:upper16:primary_map \n" //
5400 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5401 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5402 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5403 " bne .LLV8c0 \n" // no, goto .LLV8c0
5404 ".LLV8h9: \n" //
5405 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5406 " bx lr \n" //
5407 ".LLV8c0: \n" //
5408 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5409 " bne .LLV8c4 \n" //
5410 ".LLV8c2: \n" //
5411 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5412 " bx lr \n" //
5413 ".LLV8c4: \n" //
5414 // r1 holds sec-map-VABITS8
5415 // r0 holds the address. Extract the relevant 2 bits and inspect.
5416 " and r2, r0, #3 \n" // addr & 3
5417 " add r2, r2, r2 \n" // 2 * (addr & 3)
5418 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5419 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5421 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5422 " beq .LLV8h9 \n" //
5424 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5425 " beq .LLV8c2 \n" //
5427 " push {r4, lr} \n" //
5428 " mov r2, #0 \n" //
5429 " mov r1, #8 \n" //
5430 " bl mc_LOADVn_slow \n" //
5431 " pop {r4, pc} \n" //
5432 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5433 ".previous\n"
5436 /* Non-generic assembly for x86-linux */
5437 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5438 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5439 __asm__(
5440 ".text\n"
5441 ".align 16\n"
5442 ".global vgMemCheck_helperc_LOADV8\n"
5443 ".type vgMemCheck_helperc_LOADV8, @function\n"
5444 "vgMemCheck_helperc_LOADV8:\n"
5445 " mov %eax, %edx\n"
5446 " shr $0x10, %edx\n"
5447 " mov primary_map(,%edx,4), %ecx\n"
5448 " movzwl %ax, %edx\n"
5449 " shr $0x2, %edx\n"
5450 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5451 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5452 " jne .LLV8LE2\n" /* jump if not defined */
5453 ".LLV8LE1:\n"
5454 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5455 " ret\n"
5456 ".LLV8LE2:\n"
5457 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5458 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5459 ".LLV8LE3:\n"
5460 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5461 " ret\n"
5462 ".LLV8LE4:\n"
5463 " mov %eax, %ecx\n"
5464 " and $0x3, %ecx\n"
5465 " add %ecx, %ecx\n"
5466 " sar %cl, %edx\n"
5467 " and $0x3, %edx\n"
5468 " cmp $0x2, %edx\n"
5469 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5470 " cmp $0x1, %edx\n"
5471 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5472 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5473 " mov $0x8, %edx\n"
5474 " jmp mc_LOADVn_slow\n"
5475 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5476 ".previous\n"
5479 #else
5480 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5481 VG_REGPARM(1)
5482 UWord MC_(helperc_LOADV8) ( Addr a )
5484 PROF_EVENT(MCPE_LOADV8);
5486 #ifndef PERF_FAST_LOADV
5487 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5488 #else
5490 UWord sm_off, vabits8;
5491 SecMap* sm;
5493 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5494 PROF_EVENT(MCPE_LOADV8_SLOW1);
5495 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5498 sm = get_secmap_for_reading_low(a);
5499 sm_off = SM_OFF(a);
5500 vabits8 = sm->vabits8[sm_off];
5501 // Convert V bits from compact memory form to expanded register form
5502 // Handle common case quickly: a is mapped, and the entire
5503 // word32 it lives in is addressible.
5504 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5505 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5506 else {
5507 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5508 // the single byte.
5509 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5510 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5511 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5512 else {
5513 /* Slow case: the byte is not all-defined or all-undefined. */
5514 PROF_EVENT(MCPE_LOADV8_SLOW2);
5515 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5519 #endif
5521 #endif
5523 /*------------------------------------------------------------*/
5524 /*--- STOREV8 ---*/
5525 /*------------------------------------------------------------*/
5527 VG_REGPARM(2)
5528 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5530 PROF_EVENT(MCPE_STOREV8);
5532 #ifndef PERF_FAST_STOREV
5533 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5534 #else
5536 UWord sm_off, vabits8;
5537 SecMap* sm;
5539 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5540 PROF_EVENT(MCPE_STOREV8_SLOW1);
5541 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5542 return;
5545 sm = get_secmap_for_reading_low(a);
5546 sm_off = SM_OFF(a);
5547 vabits8 = sm->vabits8[sm_off];
5549 // Clevernesses to speed up storing V bits.
5550 // The 64/32/16 bit cases also have similar clevernesses, but it
5551 // works a little differently to the code below.
5553 // Cleverness 1: sometimes we don't have to write the shadow memory at
5554 // all, if we can tell that what we want to write is the same as what is
5555 // already there. These cases are marked below as "defined on defined" and
5556 // "undefined on undefined".
5558 // Cleverness 2:
5559 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5560 // be written in the secondary map. V bits can be directly written
5561 // if 4 conditions are respected:
5562 // * The address for which V bits are written is naturally aligned
5563 // on 1 byte for STOREV8 (this is always true)
5564 // on 2 bytes for STOREV16
5565 // on 4 bytes for STOREV32
5566 // on 8 bytes for STOREV64.
5567 // * V bits being written are either fully defined or fully undefined.
5568 // (for partially defined V bits, V bits cannot be directly written,
5569 // as the secondary vbits table must be maintained).
5570 // * the secmap is not distinguished (distinguished maps cannot be
5571 // modified).
5572 // * the memory corresponding to the V bits being written is
5573 // accessible (if one or more bytes are not accessible,
5574 // we must call mc_STOREVn_slow in order to report accessibility
5575 // errors).
5576 // Note that for STOREV32 and STOREV64, it is too expensive
5577 // to verify the accessibility of each byte for the benefit it
5578 // brings. Instead, a quicker check is done by comparing to
5579 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5580 // but misses some opportunity of direct modifications.
5581 // Checking each byte accessibility was measured for
5582 // STOREV32+perf tests and was slowing down all perf tests.
5583 // The cases corresponding to cleverness 2 are marked below as
5584 // "direct mod".
5585 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5586 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5587 return; // defined on defined
5589 if (!is_distinguished_sm(sm)
5590 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5591 // direct mod
5592 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5593 &(sm->vabits8[sm_off]) );
5594 return;
5596 PROF_EVENT(MCPE_STOREV8_SLOW2);
5597 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5598 return;
5600 if (V_BITS8_UNDEFINED == vbits8) {
5601 if (vabits8 == VA_BITS8_UNDEFINED) {
5602 return; // undefined on undefined
5604 if (!is_distinguished_sm(sm)
5605 && (VA_BITS2_NOACCESS
5606 != extract_vabits2_from_vabits8(a, vabits8))) {
5607 // direct mod
5608 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5609 &(sm->vabits8[sm_off]) );
5610 return;
5612 PROF_EVENT(MCPE_STOREV8_SLOW3);
5613 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5614 return;
5617 // Partially defined word
5618 PROF_EVENT(MCPE_STOREV8_SLOW4);
5619 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5621 #endif
5625 /*------------------------------------------------------------*/
5626 /*--- Functions called directly from generated code: ---*/
5627 /*--- Value-check failure handlers. ---*/
5628 /*------------------------------------------------------------*/
5630 /* Call these ones when an origin is available ... */
5631 VG_REGPARM(1)
5632 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5633 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5636 VG_REGPARM(1)
5637 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5638 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5641 VG_REGPARM(1)
5642 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5643 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5646 VG_REGPARM(1)
5647 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5648 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5651 VG_REGPARM(2)
5652 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5653 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5656 /* ... and these when an origin isn't available. */
5658 VG_REGPARM(0)
5659 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5660 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5663 VG_REGPARM(0)
5664 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5665 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5668 VG_REGPARM(0)
5669 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5670 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5673 VG_REGPARM(0)
5674 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5675 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5678 VG_REGPARM(1)
5679 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5680 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5684 /*------------------------------------------------------------*/
5685 /*--- Metadata get/set functions, for client requests. ---*/
5686 /*------------------------------------------------------------*/
5688 // Nb: this expands the V+A bits out into register-form V bits, even though
5689 // they're in memory. This is for backward compatibility, and because it's
5690 // probably what the user wants.
5692 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5693 error [no longer used], 3 == addressing error. */
5694 /* Nb: We used to issue various definedness/addressability errors from here,
5695 but we took them out because they ranged from not-very-helpful to
5696 downright annoying, and they complicated the error data structures. */
5697 static Int mc_get_or_set_vbits_for_client (
5698 Addr a,
5699 Addr vbits,
5700 SizeT szB,
5701 Bool setting, /* True <=> set vbits, False <=> get vbits */
5702 Bool is_client_request /* True <=> real user request
5703 False <=> internal call from gdbserver */
5706 SizeT i;
5707 Bool ok;
5708 UChar vbits8;
5710 /* Check that arrays are addressible before doing any getting/setting.
5711 vbits to be checked only for real user request. */
5712 for (i = 0; i < szB; i++) {
5713 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5714 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5715 return 3;
5719 /* Do the copy */
5720 if (setting) {
5721 /* setting */
5722 for (i = 0; i < szB; i++) {
5723 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5724 tl_assert(ok);
5726 } else {
5727 /* getting */
5728 for (i = 0; i < szB; i++) {
5729 ok = get_vbits8(a + i, &vbits8);
5730 tl_assert(ok);
5731 ((UChar*)vbits)[i] = vbits8;
5733 if (is_client_request)
5734 // The bytes in vbits[] have now been set, so mark them as such.
5735 MC_(make_mem_defined)(vbits, szB);
5738 return 1;
5742 /*------------------------------------------------------------*/
5743 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5744 /*------------------------------------------------------------*/
5746 /* For the memory leak detector, say whether an entire 64k chunk of
5747 address space is possibly in use, or not. If in doubt return
5748 True.
5750 Bool MC_(is_within_valid_secondary) ( Addr a )
5752 SecMap* sm = maybe_get_secmap_for ( a );
5753 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5754 /* Definitely not in use. */
5755 return False;
5756 } else {
5757 return True;
5762 /* For the memory leak detector, say whether or not a given word
5763 address is to be regarded as valid. */
5764 Bool MC_(is_valid_aligned_word) ( Addr a )
5766 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5767 tl_assert(VG_IS_WORD_ALIGNED(a));
5768 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5769 return False;
5770 if (sizeof(UWord) == 8) {
5771 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5772 return False;
5774 if (UNLIKELY(MC_(in_ignored_range)(a)))
5775 return False;
5776 else
5777 return True;
5781 /*------------------------------------------------------------*/
5782 /*--- Initialisation ---*/
5783 /*------------------------------------------------------------*/
5785 static void init_shadow_memory ( void )
5787 Int i;
5788 SecMap* sm;
5790 tl_assert(V_BIT_UNDEFINED == 1);
5791 tl_assert(V_BIT_DEFINED == 0);
5792 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5793 tl_assert(V_BITS8_DEFINED == 0);
5795 /* Build the 3 distinguished secondaries */
5796 sm = &sm_distinguished[SM_DIST_NOACCESS];
5797 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5799 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5800 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5802 sm = &sm_distinguished[SM_DIST_DEFINED];
5803 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5805 /* Set up the primary map. */
5806 /* These entries gradually get overwritten as the used address
5807 space expands. */
5808 for (i = 0; i < N_PRIMARY_MAP; i++)
5809 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5811 /* Auxiliary primary maps */
5812 init_auxmap_L1_L2();
5814 /* auxmap_size = auxmap_used = 0;
5815 no ... these are statically initialised */
5817 /* Secondary V bit table */
5818 secVBitTable = createSecVBitTable();
5822 /*------------------------------------------------------------*/
5823 /*--- Sanity check machinery (permanently engaged) ---*/
5824 /*------------------------------------------------------------*/
5826 static Bool mc_cheap_sanity_check ( void )
5828 n_sanity_cheap++;
5829 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5830 /* Check for sane operating level */
5831 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5832 return False;
5833 /* nothing else useful we can rapidly check */
5834 return True;
5837 static Bool mc_expensive_sanity_check ( void )
5839 Int i;
5840 Word n_secmaps_found;
5841 SecMap* sm;
5842 const HChar* errmsg;
5843 Bool bad = False;
5845 if (0) VG_(printf)("expensive sanity check\n");
5846 if (0) return True;
5848 n_sanity_expensive++;
5849 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5851 /* Check for sane operating level */
5852 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5853 return False;
5855 /* Check that the 3 distinguished SMs are still as they should be. */
5857 /* Check noaccess DSM. */
5858 sm = &sm_distinguished[SM_DIST_NOACCESS];
5859 for (i = 0; i < SM_CHUNKS; i++)
5860 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5861 bad = True;
5863 /* Check undefined DSM. */
5864 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5865 for (i = 0; i < SM_CHUNKS; i++)
5866 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5867 bad = True;
5869 /* Check defined DSM. */
5870 sm = &sm_distinguished[SM_DIST_DEFINED];
5871 for (i = 0; i < SM_CHUNKS; i++)
5872 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5873 bad = True;
5875 if (bad) {
5876 VG_(printf)("memcheck expensive sanity: "
5877 "distinguished_secondaries have changed\n");
5878 return False;
5881 /* If we're not checking for undefined value errors, the secondary V bit
5882 * table should be empty. */
5883 if (MC_(clo_mc_level) == 1) {
5884 if (0 != VG_(OSetGen_Size)(secVBitTable))
5885 return False;
5888 /* check the auxiliary maps, very thoroughly */
5889 n_secmaps_found = 0;
5890 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5891 if (errmsg) {
5892 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5893 return False;
5896 /* n_secmaps_found is now the number referred to by the auxiliary
5897 primary map. Now add on the ones referred to by the main
5898 primary map. */
5899 for (i = 0; i < N_PRIMARY_MAP; i++) {
5900 if (primary_map[i] == NULL) {
5901 bad = True;
5902 } else {
5903 if (!is_distinguished_sm(primary_map[i]))
5904 n_secmaps_found++;
5908 /* check that the number of secmaps issued matches the number that
5909 are reachable (iow, no secmap leaks) */
5910 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5911 bad = True;
5913 if (bad) {
5914 VG_(printf)("memcheck expensive sanity: "
5915 "apparent secmap leakage\n");
5916 return False;
5919 if (bad) {
5920 VG_(printf)("memcheck expensive sanity: "
5921 "auxmap covers wrong address space\n");
5922 return False;
5925 /* there is only one pointer to each secmap (expensive) */
5927 return True;
5930 /*------------------------------------------------------------*/
5931 /*--- Command line args ---*/
5932 /*------------------------------------------------------------*/
5934 /* 31 Aug 2015: Vectorised code is now so widespread that
5935 --partial-loads-ok needs to be enabled by default on all platforms.
5936 Not doing so causes lots of false errors. */
5937 Bool MC_(clo_partial_loads_ok) = True;
5938 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
5939 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
5940 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
5941 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
5942 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
5943 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
5944 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString)
5945 | H2S( LchLength64)
5946 | H2S( LchNewArray)
5947 | H2S( LchMultipleInheritance);
5948 Bool MC_(clo_xtree_leak) = False;
5949 const HChar* MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
5950 Bool MC_(clo_workaround_gcc296_bugs) = False;
5951 Int MC_(clo_malloc_fill) = -1;
5952 Int MC_(clo_free_fill) = -1;
5953 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free;
5954 Int MC_(clo_mc_level) = 2;
5955 Bool MC_(clo_show_mismatched_frees) = True;
5957 ExpensiveDefinednessChecks
5958 MC_(clo_expensive_definedness_checks) = EdcAUTO;
5960 Bool MC_(clo_ignore_range_below_sp) = False;
5961 UInt MC_(clo_ignore_range_below_sp__first_offset) = 0;
5962 UInt MC_(clo_ignore_range_below_sp__last_offset) = 0;
5964 static const HChar * MC_(parse_leak_heuristics_tokens) =
5965 "-,stdstring,length64,newarray,multipleinheritance";
5966 /* The first heuristic value (LchNone) has no keyword, as this is
5967 a fake heuristic used to collect the blocks found without any
5968 heuristic. */
5970 static Bool mc_process_cmd_line_options(const HChar* arg)
5972 const HChar* tmp_str;
5973 Int tmp_show;
5975 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5977 /* Set MC_(clo_mc_level):
5978 1 = A bit tracking only
5979 2 = A and V bit tracking, but no V bit origins
5980 3 = A and V bit tracking, and V bit origins
5982 Do this by inspecting --undef-value-errors= and
5983 --track-origins=. Reject the case --undef-value-errors=no
5984 --track-origins=yes as meaningless.
5986 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5987 if (MC_(clo_mc_level) == 3) {
5988 goto bad_level;
5989 } else {
5990 MC_(clo_mc_level) = 1;
5991 return True;
5994 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5995 if (MC_(clo_mc_level) == 1)
5996 MC_(clo_mc_level) = 2;
5997 return True;
5999 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
6000 if (MC_(clo_mc_level) == 3)
6001 MC_(clo_mc_level) = 2;
6002 return True;
6004 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
6005 if (MC_(clo_mc_level) == 1) {
6006 goto bad_level;
6007 } else {
6008 MC_(clo_mc_level) = 3;
6009 return True;
6013 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
6014 else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
6015 MC_(parse_leak_kinds_tokens),
6016 MC_(clo_error_for_leak_kinds)) {}
6017 else if VG_USET_CLO(arg, "--show-leak-kinds",
6018 MC_(parse_leak_kinds_tokens),
6019 MC_(clo_show_leak_kinds)) {}
6020 else if VG_USET_CLO(arg, "--leak-check-heuristics",
6021 MC_(parse_leak_heuristics_tokens),
6022 MC_(clo_leak_check_heuristics)) {}
6023 else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
6024 if (tmp_show) {
6025 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
6026 } else {
6027 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
6030 else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
6031 if (tmp_show) {
6032 MC_(clo_show_leak_kinds) |= R2S(Possible);
6033 } else {
6034 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
6037 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
6038 MC_(clo_workaround_gcc296_bugs)) {}
6040 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
6041 0, 10*1000*1000*1000LL) {}
6043 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
6044 MC_(clo_freelist_big_blocks),
6045 0, 10*1000*1000*1000LL) {}
6047 else if VG_XACT_CLO(arg, "--leak-check=no",
6048 MC_(clo_leak_check), LC_Off) {}
6049 else if VG_XACT_CLO(arg, "--leak-check=summary",
6050 MC_(clo_leak_check), LC_Summary) {}
6051 else if VG_XACT_CLO(arg, "--leak-check=yes",
6052 MC_(clo_leak_check), LC_Full) {}
6053 else if VG_XACT_CLO(arg, "--leak-check=full",
6054 MC_(clo_leak_check), LC_Full) {}
6056 else if VG_XACT_CLO(arg, "--leak-resolution=low",
6057 MC_(clo_leak_resolution), Vg_LowRes) {}
6058 else if VG_XACT_CLO(arg, "--leak-resolution=med",
6059 MC_(clo_leak_resolution), Vg_MedRes) {}
6060 else if VG_XACT_CLO(arg, "--leak-resolution=high",
6061 MC_(clo_leak_resolution), Vg_HighRes) {}
6063 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
6064 Bool ok = parse_ignore_ranges(tmp_str);
6065 if (!ok) {
6066 VG_(message)(Vg_DebugMsg,
6067 "ERROR: --ignore-ranges: "
6068 "invalid syntax, or end <= start in range\n");
6069 return False;
6071 if (gIgnoredAddressRanges) {
6072 UInt i;
6073 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6074 UWord val = IAR_INVALID;
6075 UWord key_min = ~(UWord)0;
6076 UWord key_max = (UWord)0;
6077 VG_(indexRangeMap)( &key_min, &key_max, &val,
6078 gIgnoredAddressRanges, i );
6079 tl_assert(key_min <= key_max);
6080 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
6081 if (key_max - key_min > limit && val == IAR_CommandLine) {
6082 VG_(message)(Vg_DebugMsg,
6083 "ERROR: --ignore-ranges: suspiciously large range:\n");
6084 VG_(message)(Vg_DebugMsg,
6085 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
6086 key_max - key_min + 1);
6087 return False;
6093 else if VG_STR_CLO(arg, "--ignore-range-below-sp", tmp_str) {
6094 /* This seems at first a bit weird, but: in order to imply
6095 a non-wrapped-around address range, the first offset needs to be
6096 larger than the second one. For example
6097 --ignore-range-below-sp=8192,8189
6098 would cause accesses to in the range [SP-8192, SP-8189] to be
6099 ignored. */
6100 UInt offs1 = 0, offs2 = 0;
6101 Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
6102 // Ensure we used all the text after the '=' sign.
6103 if (ok && *tmp_str != 0) ok = False;
6104 if (!ok) {
6105 VG_(message)(Vg_DebugMsg,
6106 "ERROR: --ignore-range-below-sp: invalid syntax. "
6107 " Expected \"...=decimalnumber-decimalnumber\".\n");
6108 return False;
6110 if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
6111 VG_(message)(Vg_DebugMsg,
6112 "ERROR: --ignore-range-below-sp: suspiciously large "
6113 "offset(s): %u and %u\n", offs1, offs2);
6114 return False;
6116 if (offs1 <= offs2) {
6117 VG_(message)(Vg_DebugMsg,
6118 "ERROR: --ignore-range-below-sp: invalid offsets "
6119 "(the first must be larger): %u and %u\n", offs1, offs2);
6120 return False;
6122 tl_assert(offs1 > offs2);
6123 if (offs1 - offs2 > 4096 /*arbitrary*/) {
6124 VG_(message)(Vg_DebugMsg,
6125 "ERROR: --ignore-range-below-sp: suspiciously large "
6126 "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
6127 return False;
6129 MC_(clo_ignore_range_below_sp) = True;
6130 MC_(clo_ignore_range_below_sp__first_offset) = offs1;
6131 MC_(clo_ignore_range_below_sp__last_offset) = offs2;
6132 return True;
6135 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
6136 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
6138 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
6139 MC_(clo_keep_stacktraces), KS_alloc) {}
6140 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
6141 MC_(clo_keep_stacktraces), KS_free) {}
6142 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
6143 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
6144 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
6145 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
6146 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
6147 MC_(clo_keep_stacktraces), KS_none) {}
6149 else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
6150 MC_(clo_show_mismatched_frees)) {}
6152 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=no",
6153 MC_(clo_expensive_definedness_checks), EdcNO) {}
6154 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=auto",
6155 MC_(clo_expensive_definedness_checks), EdcAUTO) {}
6156 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=yes",
6157 MC_(clo_expensive_definedness_checks), EdcYES) {}
6159 else if VG_BOOL_CLO(arg, "--xtree-leak",
6160 MC_(clo_xtree_leak)) {}
6161 else if VG_STR_CLO (arg, "--xtree-leak-file",
6162 MC_(clo_xtree_leak_file)) {}
6164 else
6165 return VG_(replacement_malloc_process_cmd_line_option)(arg);
6167 return True;
6170 bad_level:
6171 VG_(fmsg_bad_option)(arg,
6172 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6175 static void mc_print_usage(void)
6177 VG_(printf)(
6178 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6179 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6180 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6181 " [definite,possible]\n"
6182 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6183 " [definite,possible]\n"
6184 " where kind is one of:\n"
6185 " definite indirect possible reachable all none\n"
6186 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6187 " improving leak search false positive [all]\n"
6188 " where heur is one of:\n"
6189 " stdstring length64 newarray multipleinheritance all none\n"
6190 " --show-reachable=yes same as --show-leak-kinds=all\n"
6191 " --show-reachable=no --show-possibly-lost=yes\n"
6192 " same as --show-leak-kinds=definite,possible\n"
6193 " --show-reachable=no --show-possibly-lost=no\n"
6194 " same as --show-leak-kinds=definite\n"
6195 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6196 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6197 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6198 " --track-origins=no|yes show origins of undefined values? [no]\n"
6199 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6200 " --expensive-definedness-checks=no|auto|yes\n"
6201 " Use extra-precise definedness tracking [auto]\n"
6202 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6203 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6204 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6205 " Use --ignore-range-below-sp instead.\n"
6206 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6207 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6208 " accesses at the given offsets below SP\n"
6209 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6210 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6211 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6212 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6213 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6217 static void mc_print_debug_usage(void)
6219 VG_(printf)(
6220 " (none)\n"
6225 /*------------------------------------------------------------*/
6226 /*--- Client blocks ---*/
6227 /*------------------------------------------------------------*/
6229 /* Client block management:
6231 This is managed as an expanding array of client block descriptors.
6232 Indices of live descriptors are issued to the client, so it can ask
6233 to free them later. Therefore we cannot slide live entries down
6234 over dead ones. Instead we must use free/inuse flags and scan for
6235 an empty slot at allocation time. This in turn means allocation is
6236 relatively expensive, so we hope this does not happen too often.
6238 An unused block has start == size == 0
6241 /* type CGenBlock is defined in mc_include.h */
6243 /* This subsystem is self-initialising. */
6244 static UWord cgb_size = 0;
6245 static UWord cgb_used = 0;
6246 static CGenBlock* cgbs = NULL;
6248 /* Stats for this subsystem. */
6249 static ULong cgb_used_MAX = 0; /* Max in use. */
6250 static ULong cgb_allocs = 0; /* Number of allocs. */
6251 static ULong cgb_discards = 0; /* Number of discards. */
6252 static ULong cgb_search = 0; /* Number of searches. */
6255 /* Get access to the client block array. */
6256 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
6257 /*OUT*/UWord* nBlocks )
6259 *blocks = cgbs;
6260 *nBlocks = cgb_used;
6264 static
6265 Int alloc_client_block ( void )
6267 UWord i, sz_new;
6268 CGenBlock* cgbs_new;
6270 cgb_allocs++;
6272 for (i = 0; i < cgb_used; i++) {
6273 cgb_search++;
6274 if (cgbs[i].start == 0 && cgbs[i].size == 0)
6275 return i;
6278 /* Not found. Try to allocate one at the end. */
6279 if (cgb_used < cgb_size) {
6280 cgb_used++;
6281 return cgb_used-1;
6284 /* Ok, we have to allocate a new one. */
6285 tl_assert(cgb_used == cgb_size);
6286 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
6288 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
6289 for (i = 0; i < cgb_used; i++)
6290 cgbs_new[i] = cgbs[i];
6292 if (cgbs != NULL)
6293 VG_(free)( cgbs );
6294 cgbs = cgbs_new;
6296 cgb_size = sz_new;
6297 cgb_used++;
6298 if (cgb_used > cgb_used_MAX)
6299 cgb_used_MAX = cgb_used;
6300 return cgb_used-1;
6304 static void show_client_block_stats ( void )
6306 VG_(message)(Vg_DebugMsg,
6307 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6308 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6311 static void print_monitor_help ( void )
6313 VG_(gdb_printf)
6315 "\n"
6316 "memcheck monitor commands:\n"
6317 " xb <addr> [<len>]\n"
6318 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6319 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6320 " Then prints the bytes values below the corresponding validity bits\n"
6321 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6322 " Example: xb 0x8049c78 10\n"
6323 " get_vbits <addr> [<len>]\n"
6324 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6325 " make_memory [noaccess|undefined\n"
6326 " |defined|Definedifaddressable] <addr> [<len>]\n"
6327 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6328 " check_memory [addressable|defined] <addr> [<len>]\n"
6329 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6330 " and outputs a description of <addr>\n"
6331 " leak_check [full*|summary|xtleak]\n"
6332 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6333 " [heuristics heur1,heur2,...]\n"
6334 " [increased*|changed|any]\n"
6335 " [unlimited*|limited <max_loss_records_output>]\n"
6336 " * = defaults\n"
6337 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6338 " where kind is one of:\n"
6339 " definite indirect possible reachable all none\n"
6340 " where heur is one of:\n"
6341 " stdstring length64 newarray multipleinheritance all none*\n"
6342 " Examples: leak_check\n"
6343 " leak_check summary any\n"
6344 " leak_check full kinds indirect,possible\n"
6345 " leak_check full reachable any limited 100\n"
6346 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6347 " [unlimited*|limited <max_blocks>]\n"
6348 " [heuristics heur1,heur2,...]\n"
6349 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6350 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6351 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6352 " * = defaults\n"
6353 " who_points_at <addr> [<len>]\n"
6354 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6355 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6356 " with len > 1, will also show \"interior pointers\")\n"
6357 " xtmemory [<filename>]\n"
6358 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6359 "\n");
6362 /* Print szB bytes at address, with a format similar to the gdb command
6363 x /<szB>xb address.
6364 res[i] == 1 indicates the corresponding byte is addressable. */
6365 static void gdb_xb (Addr address, SizeT szB, Int res[])
6367 UInt i;
6369 for (i = 0; i < szB; i++) {
6370 UInt bnr = i % 8;
6371 if (bnr == 0) {
6372 if (i != 0)
6373 VG_(printf) ("\n"); // Terminate previous line
6374 VG_(printf) ("%p:", (void*)(address+i));
6376 if (res[i] == 1)
6377 VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6378 else
6379 VG_(printf) ("\t0x??");
6381 VG_(printf) ("\n"); // Terminate previous line
6385 /* Returns the address of the next non space character,
6386 or address of the string terminator. */
6387 static HChar* next_non_space (HChar *s)
6389 while (*s && *s == ' ')
6390 s++;
6391 return s;
6394 /* Parse an integer slice, i.e. a single integer or a range of integer.
6395 Syntax is:
6396 <integer>[..<integer> ]
6397 (spaces are allowed before and/or after ..).
6398 Return True if range correctly parsed, False otherwise. */
6399 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6400 UInt *from, UInt *to)
6402 HChar* wl;
6403 HChar *endptr;
6404 endptr = NULL;////
6405 wl = VG_(strtok_r) (s, " ", saveptr);
6407 /* slice must start with an integer. */
6408 if (wl == NULL) {
6409 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6410 return False;
6412 *from = VG_(strtoull10) (wl, &endptr);
6413 if (endptr == wl) {
6414 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6415 return False;
6418 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6419 /* wl token is an integer terminating the string
6420 or else next token does not start with .
6421 In both cases, the slice is a single integer. */
6422 *to = *from;
6423 return True;
6426 if (*endptr == '\0') {
6427 // iii .. => get the next token
6428 wl = VG_(strtok_r) (NULL, " .", saveptr);
6429 } else {
6430 // It must be iii..
6431 if (*endptr != '.' && *(endptr+1) != '.') {
6432 VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6433 return False;
6435 if ( *(endptr+2) == ' ') {
6436 // It must be iii.. jjj => get the next token
6437 wl = VG_(strtok_r) (NULL, " .", saveptr);
6438 } else {
6439 // It must be iii..jjj
6440 wl = endptr+2;
6444 *to = VG_(strtoull10) (wl, &endptr);
6445 if (*endptr != '\0') {
6446 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6447 return False;
6450 if (*from > *to) {
6451 VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6452 "in slice <from>..<to>\n");
6453 return False;
6456 return True;
6459 /* return True if request recognised, False otherwise */
6460 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6462 HChar* wcmd;
6463 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
6464 HChar *ssaveptr;
6466 VG_(strcpy) (s, req);
6468 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6469 /* NB: if possible, avoid introducing a new command below which
6470 starts with the same first letter(s) as an already existing
6471 command. This ensures a shorter abbreviation for the user. */
6472 switch (VG_(keyword_id)
6473 ("help get_vbits leak_check make_memory check_memory "
6474 "block_list who_points_at xb xtmemory",
6475 wcmd, kwd_report_duplicated_matches)) {
6476 case -2: /* multiple matches */
6477 return True;
6478 case -1: /* not found */
6479 return False;
6480 case 0: /* help */
6481 print_monitor_help();
6482 return True;
6483 case 1: { /* get_vbits */
6484 Addr address;
6485 SizeT szB = 1;
6486 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6487 UChar vbits;
6488 Int i;
6489 Int unaddressable = 0;
6490 for (i = 0; i < szB; i++) {
6491 Int res = mc_get_or_set_vbits_for_client
6492 (address+i, (Addr) &vbits, 1,
6493 False, /* get them */
6494 False /* is client request */ );
6495 /* we are before the first character on next line, print a \n. */
6496 if ((i % 32) == 0 && i != 0)
6497 VG_(printf) ("\n");
6498 /* we are before the next block of 4 starts, print a space. */
6499 else if ((i % 4) == 0 && i != 0)
6500 VG_(printf) (" ");
6501 if (res == 1) {
6502 VG_(printf) ("%02x", vbits);
6503 } else {
6504 tl_assert(3 == res);
6505 unaddressable++;
6506 VG_(printf) ("__");
6509 VG_(printf) ("\n");
6510 if (unaddressable) {
6511 VG_(printf)
6512 ("Address %p len %lu has %d bytes unaddressable\n",
6513 (void *)address, szB, unaddressable);
6516 return True;
6518 case 2: { /* leak_check */
6519 Int err = 0;
6520 LeakCheckParams lcp;
6521 HChar* xt_filename = NULL;
6522 HChar* kw;
6524 lcp.mode = LC_Full;
6525 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6526 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6527 lcp.heuristics = 0;
6528 lcp.deltamode = LCD_Increased;
6529 lcp.max_loss_records_output = 999999999;
6530 lcp.requested_by_monitor_command = True;
6531 lcp.xt_filename = NULL;
6533 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6534 kw != NULL;
6535 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6536 switch (VG_(keyword_id)
6537 ("full summary xtleak "
6538 "kinds reachable possibleleak definiteleak "
6539 "heuristics "
6540 "increased changed any "
6541 "unlimited limited ",
6542 kw, kwd_report_all)) {
6543 case -2: err++; break;
6544 case -1: err++; break;
6545 case 0: /* full */
6546 lcp.mode = LC_Full; break;
6547 case 1: /* summary */
6548 lcp.mode = LC_Summary; break;
6549 case 2: /* xtleak */
6550 lcp.mode = LC_Full;
6551 xt_filename
6552 = VG_(expand_file_name)("--xtleak-mc_main.c",
6553 "xtleak.kcg.%p.%n");
6554 lcp.xt_filename = xt_filename;
6555 break;
6556 case 3: { /* kinds */
6557 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6558 if (wcmd == NULL
6559 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6560 True/*allow_all*/,
6561 wcmd,
6562 &lcp.show_leak_kinds)) {
6563 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6564 err++;
6566 break;
6568 case 4: /* reachable */
6569 lcp.show_leak_kinds = MC_(all_Reachedness)();
6570 break;
6571 case 5: /* possibleleak */
6572 lcp.show_leak_kinds
6573 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6574 break;
6575 case 6: /* definiteleak */
6576 lcp.show_leak_kinds = R2S(Unreached);
6577 break;
6578 case 7: { /* heuristics */
6579 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6580 if (wcmd == NULL
6581 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6582 True,/*allow_all*/
6583 wcmd,
6584 &lcp.heuristics)) {
6585 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6586 err++;
6588 break;
6590 case 8: /* increased */
6591 lcp.deltamode = LCD_Increased; break;
6592 case 9: /* changed */
6593 lcp.deltamode = LCD_Changed; break;
6594 case 10: /* any */
6595 lcp.deltamode = LCD_Any; break;
6596 case 11: /* unlimited */
6597 lcp.max_loss_records_output = 999999999; break;
6598 case 12: { /* limited */
6599 Int int_value;
6600 const HChar* endptr;
6602 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6603 if (wcmd == NULL) {
6604 int_value = 0;
6605 endptr = "empty"; /* to report an error below */
6606 } else {
6607 HChar *the_end;
6608 int_value = VG_(strtoll10) (wcmd, &the_end);
6609 endptr = the_end;
6611 if (*endptr != '\0')
6612 VG_(gdb_printf) ("missing or malformed integer value\n");
6613 else if (int_value > 0)
6614 lcp.max_loss_records_output = (UInt) int_value;
6615 else
6616 VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6617 " got %d\n", int_value);
6618 break;
6620 default:
6621 tl_assert (0);
6624 if (!err)
6625 MC_(detect_memory_leaks)(tid, &lcp);
6626 if (xt_filename != NULL)
6627 VG_(free)(xt_filename);
6628 return True;
6631 case 3: { /* make_memory */
6632 Addr address;
6633 SizeT szB = 1;
6634 Int kwdid = VG_(keyword_id)
6635 ("noaccess undefined defined Definedifaddressable",
6636 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6637 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6638 return True;
6639 switch (kwdid) {
6640 case -2: break;
6641 case -1: break;
6642 case 0: MC_(make_mem_noaccess) (address, szB); break;
6643 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6644 MC_OKIND_USER ); break;
6645 case 2: MC_(make_mem_defined) ( address, szB ); break;
6646 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6647 default: tl_assert(0);
6649 return True;
6652 case 4: { /* check_memory */
6653 Addr address;
6654 SizeT szB = 1;
6655 Addr bad_addr;
6656 UInt okind;
6657 const HChar* src;
6658 UInt otag;
6659 UInt ecu;
6660 ExeContext* origin_ec;
6661 MC_ReadResult res;
6663 Int kwdid = VG_(keyword_id)
6664 ("addressable defined",
6665 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6666 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6667 return True;
6668 switch (kwdid) {
6669 case -2: break;
6670 case -1: break;
6671 case 0: /* addressable */
6672 if (is_mem_addressable ( address, szB, &bad_addr ))
6673 VG_(printf) ("Address %p len %lu addressable\n",
6674 (void *)address, szB);
6675 else
6676 VG_(printf)
6677 ("Address %p len %lu not addressable:\nbad address %p\n",
6678 (void *)address, szB, (void *) bad_addr);
6679 // Describe this (probably live) address with current epoch
6680 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6681 break;
6682 case 1: /* defined */
6683 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6684 if (MC_AddrErr == res)
6685 VG_(printf)
6686 ("Address %p len %lu not addressable:\nbad address %p\n",
6687 (void *)address, szB, (void *) bad_addr);
6688 else if (MC_ValueErr == res) {
6689 okind = otag & 3;
6690 switch (okind) {
6691 case MC_OKIND_STACK:
6692 src = " was created by a stack allocation"; break;
6693 case MC_OKIND_HEAP:
6694 src = " was created by a heap allocation"; break;
6695 case MC_OKIND_USER:
6696 src = " was created by a client request"; break;
6697 case MC_OKIND_UNKNOWN:
6698 src = ""; break;
6699 default: tl_assert(0);
6701 VG_(printf)
6702 ("Address %p len %lu not defined:\n"
6703 "Uninitialised value at %p%s\n",
6704 (void *)address, szB, (void *) bad_addr, src);
6705 ecu = otag & ~3;
6706 if (VG_(is_plausible_ECU)(ecu)) {
6707 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6708 VG_(pp_ExeContext)( origin_ec );
6711 else
6712 VG_(printf) ("Address %p len %lu defined\n",
6713 (void *)address, szB);
6714 // Describe this (probably live) address with current epoch
6715 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6716 break;
6717 default: tl_assert(0);
6719 return True;
6722 case 5: { /* block_list */
6723 HChar* wl;
6724 HChar *the_end;
6725 UInt lr_nr_from = 0;
6726 UInt lr_nr_to = 0;
6728 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6729 UInt limit_blocks = 999999999;
6730 Int int_value;
6731 UInt heuristics = 0;
6733 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6734 wl != NULL;
6735 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6736 switch (VG_(keyword_id) ("unlimited limited heuristics ",
6737 wl, kwd_report_all)) {
6738 case -2: return True;
6739 case -1: return True;
6740 case 0: /* unlimited */
6741 limit_blocks = 999999999; break;
6742 case 1: /* limited */
6743 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6744 if (wcmd == NULL) {
6745 VG_(gdb_printf) ("missing integer value\n");
6746 return True;
6748 int_value = VG_(strtoll10) (wcmd, &the_end);
6749 if (*the_end != '\0') {
6750 VG_(gdb_printf) ("malformed integer value\n");
6751 return True;
6753 if (int_value <= 0) {
6754 VG_(gdb_printf) ("max_blocks must be >= 1,"
6755 " got %d\n", int_value);
6756 return True;
6758 limit_blocks = (UInt) int_value;
6759 break;
6760 case 2: /* heuristics */
6761 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6762 if (wcmd == NULL
6763 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6764 True,/*allow_all*/
6765 wcmd,
6766 &heuristics)) {
6767 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6768 return True;
6770 break;
6771 default:
6772 tl_assert (0);
6775 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6776 is 1 more than the index in lr_array. */
6777 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6778 lr_nr_to-1,
6779 limit_blocks,
6780 heuristics))
6781 VG_(gdb_printf) ("invalid loss record nr\n");
6783 return True;
6786 case 6: { /* who_points_at */
6787 Addr address;
6788 SizeT szB = 1;
6790 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6791 return True;
6792 if (address == (Addr) 0) {
6793 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6794 return True;
6796 MC_(who_points_at) (address, szB);
6797 return True;
6800 case 7: { /* xb */
6801 Addr address;
6802 SizeT szB = 1;
6803 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6804 UChar vbits[8];
6805 Int res[8];
6806 Int i;
6807 Int unaddressable = 0;
6808 for (i = 0; i < szB; i++) {
6809 Int bnr = i % 8;
6810 res[bnr] = mc_get_or_set_vbits_for_client
6811 (address+i, (Addr) &vbits[bnr], 1,
6812 False, /* get them */
6813 False /* is client request */ );
6814 /* We going to print the first vabits of a new line.
6815 Terminate the previous line if needed: prints a line with the
6816 address and the data. */
6817 if (bnr == 0) {
6818 if (i != 0) {
6819 VG_(printf) ("\n");
6820 gdb_xb (address + i - 8, 8, res);
6822 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6824 if (res[bnr] == 1) {
6825 VG_(printf) ("\t %02x", vbits[bnr]);
6826 } else {
6827 tl_assert(3 == res[bnr]);
6828 unaddressable++;
6829 VG_(printf) ("\t __");
6832 VG_(printf) ("\n");
6833 if (szB % 8 == 0 && szB > 0)
6834 gdb_xb (address + szB - 8, 8, res);
6835 else
6836 gdb_xb (address + szB - szB % 8, szB % 8, res);
6837 if (unaddressable) {
6838 VG_(printf)
6839 ("Address %p len %lu has %d bytes unaddressable\n",
6840 (void *)address, szB, unaddressable);
6843 return True;
6846 case 8: { /* xtmemory */
6847 HChar* filename;
6848 filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
6849 MC_(xtmemory_report)(filename, False);
6850 return True;
6853 default:
6854 tl_assert(0);
6855 return False;
6859 /*------------------------------------------------------------*/
6860 /*--- Client requests ---*/
6861 /*------------------------------------------------------------*/
6863 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6865 Int i;
6866 Addr bad_addr;
6868 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6869 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6870 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6871 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6872 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6873 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6874 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6875 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6876 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6877 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
6878 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
6879 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
6880 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
6881 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6882 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6883 return False;
6885 switch (arg[0]) {
6886 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
6887 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
6888 if (!ok)
6889 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
6890 *ret = ok ? (UWord)NULL : bad_addr;
6891 break;
6894 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
6895 Bool errorV = False;
6896 Addr bad_addrV = 0;
6897 UInt otagV = 0;
6898 Bool errorA = False;
6899 Addr bad_addrA = 0;
6900 is_mem_defined_comprehensive(
6901 arg[1], arg[2],
6902 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
6904 if (errorV) {
6905 MC_(record_user_error) ( tid, bad_addrV,
6906 /*isAddrErr*/False, otagV );
6908 if (errorA) {
6909 MC_(record_user_error) ( tid, bad_addrA,
6910 /*isAddrErr*/True, 0 );
6912 /* Return the lower of the two erring addresses, if any. */
6913 *ret = 0;
6914 if (errorV && !errorA) {
6915 *ret = bad_addrV;
6917 if (!errorV && errorA) {
6918 *ret = bad_addrA;
6920 if (errorV && errorA) {
6921 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
6923 break;
6926 case VG_USERREQ__DO_LEAK_CHECK: {
6927 LeakCheckParams lcp;
6929 if (arg[1] == 0)
6930 lcp.mode = LC_Full;
6931 else if (arg[1] == 1)
6932 lcp.mode = LC_Summary;
6933 else {
6934 VG_(message)(Vg_UserMsg,
6935 "Warning: unknown memcheck leak search mode\n");
6936 lcp.mode = LC_Full;
6939 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6940 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6941 lcp.heuristics = MC_(clo_leak_check_heuristics);
6943 if (arg[2] == 0)
6944 lcp.deltamode = LCD_Any;
6945 else if (arg[2] == 1)
6946 lcp.deltamode = LCD_Increased;
6947 else if (arg[2] == 2)
6948 lcp.deltamode = LCD_Changed;
6949 else {
6950 VG_(message)
6951 (Vg_UserMsg,
6952 "Warning: unknown memcheck leak search deltamode\n");
6953 lcp.deltamode = LCD_Any;
6955 lcp.max_loss_records_output = 999999999;
6956 lcp.requested_by_monitor_command = False;
6957 lcp.xt_filename = NULL;
6959 MC_(detect_memory_leaks)(tid, &lcp);
6960 *ret = 0; /* return value is meaningless */
6961 break;
6964 case VG_USERREQ__MAKE_MEM_NOACCESS:
6965 MC_(make_mem_noaccess) ( arg[1], arg[2] );
6966 *ret = -1;
6967 break;
6969 case VG_USERREQ__MAKE_MEM_UNDEFINED:
6970 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
6971 MC_OKIND_USER );
6972 *ret = -1;
6973 break;
6975 case VG_USERREQ__MAKE_MEM_DEFINED:
6976 MC_(make_mem_defined) ( arg[1], arg[2] );
6977 *ret = -1;
6978 break;
6980 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
6981 make_mem_defined_if_addressable ( arg[1], arg[2] );
6982 *ret = -1;
6983 break;
6985 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
6986 if (arg[1] != 0 && arg[2] != 0) {
6987 i = alloc_client_block();
6988 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
6989 cgbs[i].start = arg[1];
6990 cgbs[i].size = arg[2];
6991 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
6992 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
6993 *ret = i;
6994 } else
6995 *ret = -1;
6996 break;
6998 case VG_USERREQ__DISCARD: /* discard */
6999 if (cgbs == NULL
7000 || arg[2] >= cgb_used ||
7001 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
7002 *ret = 1;
7003 } else {
7004 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
7005 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
7006 VG_(free)(cgbs[arg[2]].desc);
7007 cgb_discards++;
7008 *ret = 0;
7010 break;
7012 case VG_USERREQ__GET_VBITS:
7013 *ret = mc_get_or_set_vbits_for_client
7014 ( arg[1], arg[2], arg[3],
7015 False /* get them */,
7016 True /* is client request */ );
7017 break;
7019 case VG_USERREQ__SET_VBITS:
7020 *ret = mc_get_or_set_vbits_for_client
7021 ( arg[1], arg[2], arg[3],
7022 True /* set them */,
7023 True /* is client request */ );
7024 break;
7026 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
7027 UWord** argp = (UWord**)arg;
7028 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7029 // if no prior leak checks performed).
7030 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
7031 *argp[2] = MC_(bytes_dubious);
7032 *argp[3] = MC_(bytes_reachable);
7033 *argp[4] = MC_(bytes_suppressed);
7034 // there is no argp[5]
7035 //*argp[5] = MC_(bytes_indirect);
7036 // XXX need to make *argp[1-4] defined; currently done in the
7037 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7038 *ret = 0;
7039 return True;
7041 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
7042 UWord** argp = (UWord**)arg;
7043 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7044 // if no prior leak checks performed).
7045 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
7046 *argp[2] = MC_(blocks_dubious);
7047 *argp[3] = MC_(blocks_reachable);
7048 *argp[4] = MC_(blocks_suppressed);
7049 // there is no argp[5]
7050 //*argp[5] = MC_(blocks_indirect);
7051 // XXX need to make *argp[1-4] defined; currently done in the
7052 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7053 *ret = 0;
7054 return True;
7056 case VG_USERREQ__MALLOCLIKE_BLOCK: {
7057 Addr p = (Addr)arg[1];
7058 SizeT sizeB = arg[2];
7059 UInt rzB = arg[3];
7060 Bool is_zeroed = (Bool)arg[4];
7062 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
7063 MC_AllocCustom, MC_(malloc_list) );
7064 if (rzB > 0) {
7065 MC_(make_mem_noaccess) ( p - rzB, rzB);
7066 MC_(make_mem_noaccess) ( p + sizeB, rzB);
7068 return True;
7070 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
7071 Addr p = (Addr)arg[1];
7072 SizeT oldSizeB = arg[2];
7073 SizeT newSizeB = arg[3];
7074 UInt rzB = arg[4];
7076 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
7077 return True;
7079 case VG_USERREQ__FREELIKE_BLOCK: {
7080 Addr p = (Addr)arg[1];
7081 UInt rzB = arg[2];
7083 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
7084 return True;
7087 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
7088 HChar* s = (HChar*)arg[1];
7089 Addr dst = (Addr) arg[2];
7090 Addr src = (Addr) arg[3];
7091 SizeT len = (SizeT)arg[4];
7092 MC_(record_overlap_error)(tid, s, src, dst, len);
7093 return True;
7096 case VG_USERREQ__CREATE_MEMPOOL: {
7097 Addr pool = (Addr)arg[1];
7098 UInt rzB = arg[2];
7099 Bool is_zeroed = (Bool)arg[3];
7100 UInt flags = arg[4];
7102 // The create_mempool function does not know these mempool flags,
7103 // pass as booleans.
7104 MC_(create_mempool) ( pool, rzB, is_zeroed,
7105 (flags & VALGRIND_MEMPOOL_AUTO_FREE),
7106 (flags & VALGRIND_MEMPOOL_METAPOOL) );
7107 return True;
7110 case VG_USERREQ__DESTROY_MEMPOOL: {
7111 Addr pool = (Addr)arg[1];
7113 MC_(destroy_mempool) ( pool );
7114 return True;
7117 case VG_USERREQ__MEMPOOL_ALLOC: {
7118 Addr pool = (Addr)arg[1];
7119 Addr addr = (Addr)arg[2];
7120 UInt size = arg[3];
7122 MC_(mempool_alloc) ( tid, pool, addr, size );
7123 return True;
7126 case VG_USERREQ__MEMPOOL_FREE: {
7127 Addr pool = (Addr)arg[1];
7128 Addr addr = (Addr)arg[2];
7130 MC_(mempool_free) ( pool, addr );
7131 return True;
7134 case VG_USERREQ__MEMPOOL_TRIM: {
7135 Addr pool = (Addr)arg[1];
7136 Addr addr = (Addr)arg[2];
7137 UInt size = arg[3];
7139 MC_(mempool_trim) ( pool, addr, size );
7140 return True;
7143 case VG_USERREQ__MOVE_MEMPOOL: {
7144 Addr poolA = (Addr)arg[1];
7145 Addr poolB = (Addr)arg[2];
7147 MC_(move_mempool) ( poolA, poolB );
7148 return True;
7151 case VG_USERREQ__MEMPOOL_CHANGE: {
7152 Addr pool = (Addr)arg[1];
7153 Addr addrA = (Addr)arg[2];
7154 Addr addrB = (Addr)arg[3];
7155 UInt size = arg[4];
7157 MC_(mempool_change) ( pool, addrA, addrB, size );
7158 return True;
7161 case VG_USERREQ__MEMPOOL_EXISTS: {
7162 Addr pool = (Addr)arg[1];
7164 *ret = (UWord) MC_(mempool_exists) ( pool );
7165 return True;
7168 case VG_USERREQ__GDB_MONITOR_COMMAND: {
7169 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
7170 if (handled)
7171 *ret = 1;
7172 else
7173 *ret = 0;
7174 return handled;
7177 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
7178 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
7179 Bool addRange
7180 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
7181 Bool ok
7182 = modify_ignore_ranges(addRange, arg[1], arg[2]);
7183 *ret = ok ? 1 : 0;
7184 return True;
7187 default:
7188 VG_(message)(
7189 Vg_UserMsg,
7190 "Warning: unknown memcheck client request code %llx\n",
7191 (ULong)arg[0]
7193 return False;
7195 return True;
7199 /*------------------------------------------------------------*/
7200 /*--- Crude profiling machinery. ---*/
7201 /*------------------------------------------------------------*/
7203 // We track a number of interesting events (using PROF_EVENT)
7204 // if MC_PROFILE_MEMORY is defined.
7206 #ifdef MC_PROFILE_MEMORY
7208 ULong MC_(event_ctr)[MCPE_LAST];
7210 /* Event counter names. Use the name of the function that increases the
7211 event counter. Drop any MC_() and mc_ prefices. */
7212 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
7213 [MCPE_LOADVN_SLOW] = "LOADVn_slow",
7214 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
7215 [MCPE_STOREVN_SLOW] = "STOREVn_slow",
7216 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
7217 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
7218 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
7219 "make_aligned_word32_undefined_slow",
7220 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
7221 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
7222 "make_aligned_word64_undefined_slow",
7223 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
7224 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
7225 "make_aligned_word32_noaccess_slow",
7226 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
7227 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
7228 "make_aligned_word64_noaccess_slow",
7229 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
7230 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
7231 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
7232 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
7233 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
7234 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
7235 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
7236 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
7237 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
7238 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
7239 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
7240 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
7241 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
7242 [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
7243 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
7244 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
7245 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
7246 "is_mem_defined_comprehensive(loop)",
7247 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
7248 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
7249 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
7250 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
7251 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
7252 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
7253 "set_address_range_perms(single-secmap)",
7254 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
7255 "set_address_range_perms(startof-secmap)",
7256 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
7257 "set_address_range_perms(multiple-secmaps)",
7258 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
7259 "set_address_range_perms(dist-sm1)",
7260 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
7261 "set_address_range_perms(dist-sm2)",
7262 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
7263 "set_address_range_perms(dist-sm1-quick)",
7264 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
7265 "set_address_range_perms(dist-sm2-quick)",
7266 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
7267 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
7268 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
7269 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
7270 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
7271 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
7272 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
7273 "set_address_range_perms(loop64K-free-dist-sm)",
7274 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
7275 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256",
7276 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
7277 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
7278 [MCPE_LOADV64] = "LOADV64",
7279 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1",
7280 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2",
7281 [MCPE_STOREV64] = "STOREV64",
7282 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
7283 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
7284 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
7285 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
7286 [MCPE_LOADV32] = "LOADV32",
7287 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1",
7288 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2",
7289 [MCPE_STOREV32] = "STOREV32",
7290 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
7291 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
7292 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
7293 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
7294 [MCPE_LOADV16] = "LOADV16",
7295 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1",
7296 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2",
7297 [MCPE_STOREV16] = "STOREV16",
7298 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
7299 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
7300 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
7301 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
7302 [MCPE_LOADV8] = "LOADV8",
7303 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1",
7304 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2",
7305 [MCPE_STOREV8] = "STOREV8",
7306 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1",
7307 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2",
7308 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3",
7309 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4",
7310 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4",
7311 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8",
7312 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12",
7313 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16",
7314 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32",
7315 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
7316 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
7317 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
7318 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
7319 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4",
7320 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8",
7321 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12",
7322 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16",
7323 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32",
7324 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
7325 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
7326 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
7327 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
7328 [MCPE_NEW_MEM_STACK] = "new_mem_stack",
7329 [MCPE_DIE_MEM_STACK] = "die_mem_stack",
7330 [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o",
7331 [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o",
7332 [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
7333 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
7334 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7335 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
7336 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7337 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
7338 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7341 static void init_prof_mem ( void )
7343 Int i, name_count = 0;
7345 for (i = 0; i < MCPE_LAST; i++) {
7346 MC_(event_ctr)[i] = 0;
7347 if (MC_(event_ctr_name)[i] != NULL)
7348 ++name_count;
7351 /* Make sure every profiling event has a name */
7352 tl_assert(name_count == MCPE_LAST);
7355 static void done_prof_mem ( void )
7357 Int i, n;
7358 Bool spaced = False;
7359 for (i = n = 0; i < MCPE_LAST; i++) {
7360 if (!spaced && (n % 10) == 0) {
7361 VG_(printf)("\n");
7362 spaced = True;
7364 if (MC_(event_ctr)[i] > 0) {
7365 spaced = False;
7366 ++n;
7367 VG_(printf)( "prof mem event %3d: %11llu %s\n",
7368 i, MC_(event_ctr)[i],
7369 MC_(event_ctr_name)[i]);
7374 #else
7376 static void init_prof_mem ( void ) { }
7377 static void done_prof_mem ( void ) { }
7379 #endif
7382 /*------------------------------------------------------------*/
7383 /*--- Origin tracking stuff ---*/
7384 /*------------------------------------------------------------*/
7386 /*--------------------------------------------*/
7387 /*--- Origin tracking: load handlers ---*/
7388 /*--------------------------------------------*/
7390 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7391 return or1 > or2 ? or1 : or2;
7394 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7395 OCacheLine* line;
7396 UChar descr;
7397 UWord lineoff = oc_line_offset(a);
7398 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7400 if (OC_ENABLE_ASSERTIONS) {
7401 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7404 line = find_OCacheLine( a );
7406 descr = line->descr[lineoff];
7407 if (OC_ENABLE_ASSERTIONS) {
7408 tl_assert(descr < 0x10);
7411 if (LIKELY(0 == (descr & (1 << byteoff)))) {
7412 return 0;
7413 } else {
7414 return line->w32[lineoff];
7418 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7419 OCacheLine* line;
7420 UChar descr;
7421 UWord lineoff, byteoff;
7423 if (UNLIKELY(a & 1)) {
7424 /* Handle misaligned case, slowly. */
7425 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
7426 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
7427 return merge_origins(oLo, oHi);
7430 lineoff = oc_line_offset(a);
7431 byteoff = a & 3; /* 0 or 2 */
7433 if (OC_ENABLE_ASSERTIONS) {
7434 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7436 line = find_OCacheLine( a );
7438 descr = line->descr[lineoff];
7439 if (OC_ENABLE_ASSERTIONS) {
7440 tl_assert(descr < 0x10);
7443 if (LIKELY(0 == (descr & (3 << byteoff)))) {
7444 return 0;
7445 } else {
7446 return line->w32[lineoff];
7450 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7451 OCacheLine* line;
7452 UChar descr;
7453 UWord lineoff;
7455 if (UNLIKELY(a & 3)) {
7456 /* Handle misaligned case, slowly. */
7457 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
7458 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
7459 return merge_origins(oLo, oHi);
7462 lineoff = oc_line_offset(a);
7463 if (OC_ENABLE_ASSERTIONS) {
7464 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7467 line = find_OCacheLine( a );
7469 descr = line->descr[lineoff];
7470 if (OC_ENABLE_ASSERTIONS) {
7471 tl_assert(descr < 0x10);
7474 if (LIKELY(0 == descr)) {
7475 return 0;
7476 } else {
7477 return line->w32[lineoff];
7481 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7482 OCacheLine* line;
7483 UChar descrLo, descrHi, descr;
7484 UWord lineoff;
7486 if (UNLIKELY(a & 7)) {
7487 /* Handle misaligned case, slowly. */
7488 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
7489 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
7490 return merge_origins(oLo, oHi);
7493 lineoff = oc_line_offset(a);
7494 if (OC_ENABLE_ASSERTIONS) {
7495 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7498 line = find_OCacheLine( a );
7500 descrLo = line->descr[lineoff + 0];
7501 descrHi = line->descr[lineoff + 1];
7502 descr = descrLo | descrHi;
7503 if (OC_ENABLE_ASSERTIONS) {
7504 tl_assert(descr < 0x10);
7507 if (LIKELY(0 == descr)) {
7508 return 0; /* both 32-bit chunks are defined */
7509 } else {
7510 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
7511 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
7512 return merge_origins(oLo, oHi);
7516 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7517 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
7518 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
7519 UInt oBoth = merge_origins(oLo, oHi);
7520 return (UWord)oBoth;
7523 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7524 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
7525 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
7526 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
7527 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
7528 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
7529 merge_origins(oQ2, oQ3));
7530 return (UWord)oAll;
7534 /*--------------------------------------------*/
7535 /*--- Origin tracking: store handlers ---*/
7536 /*--------------------------------------------*/
7538 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7539 OCacheLine* line;
7540 UWord lineoff = oc_line_offset(a);
7541 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7543 if (OC_ENABLE_ASSERTIONS) {
7544 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7547 line = find_OCacheLine( a );
7549 if (d32 == 0) {
7550 line->descr[lineoff] &= ~(1 << byteoff);
7551 } else {
7552 line->descr[lineoff] |= (1 << byteoff);
7553 line->w32[lineoff] = d32;
7557 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7558 OCacheLine* line;
7559 UWord lineoff, byteoff;
7561 if (UNLIKELY(a & 1)) {
7562 /* Handle misaligned case, slowly. */
7563 MC_(helperc_b_store1)( a + 0, d32 );
7564 MC_(helperc_b_store1)( a + 1, d32 );
7565 return;
7568 lineoff = oc_line_offset(a);
7569 byteoff = a & 3; /* 0 or 2 */
7571 if (OC_ENABLE_ASSERTIONS) {
7572 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7575 line = find_OCacheLine( a );
7577 if (d32 == 0) {
7578 line->descr[lineoff] &= ~(3 << byteoff);
7579 } else {
7580 line->descr[lineoff] |= (3 << byteoff);
7581 line->w32[lineoff] = d32;
7585 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7586 OCacheLine* line;
7587 UWord lineoff;
7589 if (UNLIKELY(a & 3)) {
7590 /* Handle misaligned case, slowly. */
7591 MC_(helperc_b_store2)( a + 0, d32 );
7592 MC_(helperc_b_store2)( a + 2, d32 );
7593 return;
7596 lineoff = oc_line_offset(a);
7597 if (OC_ENABLE_ASSERTIONS) {
7598 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7601 line = find_OCacheLine( a );
7603 if (d32 == 0) {
7604 line->descr[lineoff] = 0;
7605 } else {
7606 line->descr[lineoff] = 0xF;
7607 line->w32[lineoff] = d32;
7611 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7612 OCacheLine* line;
7613 UWord lineoff;
7615 if (UNLIKELY(a & 7)) {
7616 /* Handle misaligned case, slowly. */
7617 MC_(helperc_b_store4)( a + 0, d32 );
7618 MC_(helperc_b_store4)( a + 4, d32 );
7619 return;
7622 lineoff = oc_line_offset(a);
7623 if (OC_ENABLE_ASSERTIONS) {
7624 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7627 line = find_OCacheLine( a );
7629 if (d32 == 0) {
7630 line->descr[lineoff + 0] = 0;
7631 line->descr[lineoff + 1] = 0;
7632 } else {
7633 line->descr[lineoff + 0] = 0xF;
7634 line->descr[lineoff + 1] = 0xF;
7635 line->w32[lineoff + 0] = d32;
7636 line->w32[lineoff + 1] = d32;
7640 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7641 MC_(helperc_b_store8)( a + 0, d32 );
7642 MC_(helperc_b_store8)( a + 8, d32 );
7645 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7646 MC_(helperc_b_store8)( a + 0, d32 );
7647 MC_(helperc_b_store8)( a + 8, d32 );
7648 MC_(helperc_b_store8)( a + 16, d32 );
7649 MC_(helperc_b_store8)( a + 24, d32 );
7653 /*--------------------------------------------*/
7654 /*--- Origin tracking: sarp handlers ---*/
7655 /*--------------------------------------------*/
7657 __attribute__((noinline))
7658 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
7659 if ((a & 1) && len >= 1) {
7660 MC_(helperc_b_store1)( a, otag );
7661 a++;
7662 len--;
7664 if ((a & 2) && len >= 2) {
7665 MC_(helperc_b_store2)( a, otag );
7666 a += 2;
7667 len -= 2;
7669 if (len >= 4)
7670 tl_assert(0 == (a & 3));
7671 while (len >= 4) {
7672 MC_(helperc_b_store4)( a, otag );
7673 a += 4;
7674 len -= 4;
7676 if (len >= 2) {
7677 MC_(helperc_b_store2)( a, otag );
7678 a += 2;
7679 len -= 2;
7681 if (len >= 1) {
7682 MC_(helperc_b_store1)( a, otag );
7683 //a++;
7684 len--;
7686 tl_assert(len == 0);
7689 __attribute__((noinline))
7690 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
7691 if ((a & 1) && len >= 1) {
7692 MC_(helperc_b_store1)( a, 0 );
7693 a++;
7694 len--;
7696 if ((a & 2) && len >= 2) {
7697 MC_(helperc_b_store2)( a, 0 );
7698 a += 2;
7699 len -= 2;
7701 if (len >= 4)
7702 tl_assert(0 == (a & 3));
7703 while (len >= 4) {
7704 MC_(helperc_b_store4)( a, 0 );
7705 a += 4;
7706 len -= 4;
7708 if (len >= 2) {
7709 MC_(helperc_b_store2)( a, 0 );
7710 a += 2;
7711 len -= 2;
7713 if (len >= 1) {
7714 MC_(helperc_b_store1)( a, 0 );
7715 //a++;
7716 len--;
7718 tl_assert(len == 0);
7722 /*------------------------------------------------------------*/
7723 /*--- Setup and finalisation ---*/
7724 /*------------------------------------------------------------*/
7726 static void mc_post_clo_init ( void )
7728 /* If we've been asked to emit XML, mash around various other
7729 options so as to constrain the output somewhat. */
7730 if (VG_(clo_xml)) {
7731 /* Extract as much info as possible from the leak checker. */
7732 MC_(clo_leak_check) = LC_Full;
7735 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
7736 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7737 VG_(message)(Vg_UserMsg,
7738 "Warning: --freelist-big-blocks value %lld has no effect\n"
7739 "as it is >= to --freelist-vol value %lld\n",
7740 MC_(clo_freelist_big_blocks),
7741 MC_(clo_freelist_vol));
7744 if (MC_(clo_workaround_gcc296_bugs)
7745 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7746 VG_(umsg)(
7747 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7748 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7749 "\n"
7753 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
7755 if (MC_(clo_mc_level) == 3) {
7756 /* We're doing origin tracking. */
7757 # ifdef PERF_FAST_STACK
7758 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
7759 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
7760 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
7761 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
7762 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
7763 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
7764 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
7765 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
7766 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
7767 # endif
7768 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
7769 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
7770 } else {
7771 /* Not doing origin tracking */
7772 # ifdef PERF_FAST_STACK
7773 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
7774 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
7775 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
7776 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
7777 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
7778 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
7779 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
7780 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
7781 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
7782 # endif
7783 VG_(track_new_mem_stack) ( mc_new_mem_stack );
7784 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
7787 // We assume that brk()/sbrk() does not initialise new memory. Is this
7788 // accurate? John Reiser says:
7790 // 0) sbrk() can *decrease* process address space. No zero fill is done
7791 // for a decrease, not even the fragment on the high end of the last page
7792 // that is beyond the new highest address. For maximum safety and
7793 // portability, then the bytes in the last page that reside above [the
7794 // new] sbrk(0) should be considered to be uninitialized, but in practice
7795 // it is exceedingly likely that they will retain their previous
7796 // contents.
7798 // 1) If an increase is large enough to require new whole pages, then
7799 // those new whole pages (like all new pages) are zero-filled by the
7800 // operating system. So if sbrk(0) already is page aligned, then
7801 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7803 // 2) Any increase that lies within an existing allocated page is not
7804 // changed. So if (x = sbrk(0)) is not page aligned, then
7805 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7806 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7807 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7808 // of them come along for the ride because the operating system deals
7809 // only in whole pages. Again, for maximum safety and portability, then
7810 // anything that lives above [the new] sbrk(0) should be considered
7811 // uninitialized, but in practice will retain previous contents [zero in
7812 // this case.]"
7814 // In short:
7816 // A key property of sbrk/brk is that new whole pages that are supplied
7817 // by the operating system *do* get initialized to zero.
7819 // As for the portability of all this:
7821 // sbrk and brk are not POSIX. However, any system that is a derivative
7822 // of *nix has sbrk and brk because there are too many software (such as
7823 // the Bourne shell) which rely on the traditional memory map (.text,
7824 // .data+.bss, stack) and the existence of sbrk/brk.
7826 // So we should arguably observe all this. However:
7827 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7828 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7829 // doubt most programmers know the above information.
7830 // So I'm not terribly unhappy with marking it as undefined. --njn.
7832 // [More: I think most of what John said only applies to sbrk(). It seems
7833 // that brk() always deals in whole pages. And since this event deals
7834 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7835 // just mark all memory it allocates as defined.]
7837 # if !defined(VGO_solaris)
7838 if (MC_(clo_mc_level) == 3)
7839 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
7840 else
7841 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
7842 # else
7843 // On Solaris, brk memory has to be marked as defined, otherwise we get
7844 // many false positives.
7845 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
7846 # endif
7848 /* This origin tracking cache is huge (~100M), so only initialise
7849 if we need it. */
7850 if (MC_(clo_mc_level) >= 3) {
7851 init_OCache();
7852 tl_assert(ocacheL1 != NULL);
7853 tl_assert(ocacheL2 != NULL);
7854 } else {
7855 tl_assert(ocacheL1 == NULL);
7856 tl_assert(ocacheL2 == NULL);
7859 MC_(chunk_poolalloc) = VG_(newPA)
7860 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
7861 1000,
7862 VG_(malloc),
7863 "mc.cMC.1 (MC_Chunk pools)",
7864 VG_(free));
7866 /* Do not check definedness of guest state if --undef-value-errors=no */
7867 if (MC_(clo_mc_level) >= 2)
7868 VG_(track_pre_reg_read) ( mc_pre_reg_read );
7870 if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
7871 if (MC_(clo_keep_stacktraces) == KS_none
7872 || MC_(clo_keep_stacktraces) == KS_free)
7873 VG_(fmsg_bad_option)("--keep-stacktraces",
7874 "To use --xtree-memory=full, you must"
7875 " keep at least the alloc stacktrace\n");
7876 // Activate full xtree memory profiling.
7877 VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
7882 static void print_SM_info(const HChar* type, Int n_SMs)
7884 VG_(message)(Vg_DebugMsg,
7885 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7886 type,
7887 n_SMs,
7888 n_SMs * sizeof(SecMap) / 1024UL,
7889 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
7892 static void mc_print_stats (void)
7894 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
7896 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
7897 VG_(free_queue_volume), VG_(free_queue_length));
7898 VG_(message)(Vg_DebugMsg,
7899 " memcheck: sanity checks: %d cheap, %d expensive\n",
7900 n_sanity_cheap, n_sanity_expensive );
7901 VG_(message)(Vg_DebugMsg,
7902 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7903 n_auxmap_L2_nodes,
7904 n_auxmap_L2_nodes * 64,
7905 n_auxmap_L2_nodes / 16 );
7906 VG_(message)(Vg_DebugMsg,
7907 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7908 n_auxmap_L1_searches, n_auxmap_L1_cmps,
7909 (10ULL * n_auxmap_L1_cmps)
7910 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
7912 VG_(message)(Vg_DebugMsg,
7913 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7914 n_auxmap_L2_searches, n_auxmap_L2_nodes
7917 print_SM_info("n_issued ", n_issued_SMs);
7918 print_SM_info("n_deissued ", n_deissued_SMs);
7919 print_SM_info("max_noaccess ", max_noaccess_SMs);
7920 print_SM_info("max_undefined", max_undefined_SMs);
7921 print_SM_info("max_defined ", max_defined_SMs);
7922 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
7924 // Three DSMs, plus the non-DSM ones
7925 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
7926 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7927 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7928 // the elements on pointer size.
7929 // Note that the pool allocator has some additional small overhead
7930 // which is not counted in the below.
7931 // Hardwiring this logic sucks, but I don't see how else to do it.
7932 max_secVBit_szB = max_secVBit_nodes *
7933 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
7934 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
7936 VG_(message)(Vg_DebugMsg,
7937 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
7938 max_secVBit_nodes, max_secVBit_szB / 1024,
7939 max_secVBit_szB / (1024 * 1024));
7940 VG_(message)(Vg_DebugMsg,
7941 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
7942 sec_vbits_new_nodes + sec_vbits_updates,
7943 sec_vbits_new_nodes, sec_vbits_updates );
7944 VG_(message)(Vg_DebugMsg,
7945 " memcheck: max shadow mem size: %luk, %luM\n",
7946 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
7948 if (MC_(clo_mc_level) >= 3) {
7949 VG_(message)(Vg_DebugMsg,
7950 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
7951 stats_ocacheL1_find,
7952 stats_ocacheL1_misses,
7953 stats_ocacheL1_lossage );
7954 VG_(message)(Vg_DebugMsg,
7955 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
7956 stats_ocacheL1_find - stats_ocacheL1_misses
7957 - stats_ocacheL1_found_at_1
7958 - stats_ocacheL1_found_at_N,
7959 stats_ocacheL1_found_at_1 );
7960 VG_(message)(Vg_DebugMsg,
7961 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
7962 stats_ocacheL1_found_at_N,
7963 stats_ocacheL1_movefwds );
7964 VG_(message)(Vg_DebugMsg,
7965 " ocacheL1: %'12lu sizeB %'12d useful\n",
7966 (SizeT)sizeof(OCache),
7967 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
7968 VG_(message)(Vg_DebugMsg,
7969 " ocacheL2: %'12lu refs %'12lu misses\n",
7970 stats__ocacheL2_refs,
7971 stats__ocacheL2_misses );
7972 VG_(message)(Vg_DebugMsg,
7973 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
7974 stats__ocacheL2_n_nodes_max,
7975 stats__ocacheL2_n_nodes );
7976 VG_(message)(Vg_DebugMsg,
7977 " niacache: %'12lu refs %'12lu misses\n",
7978 stats__nia_cache_queries, stats__nia_cache_misses);
7979 } else {
7980 tl_assert(ocacheL1 == NULL);
7981 tl_assert(ocacheL2 == NULL);
7986 static void mc_fini ( Int exitcode )
7988 MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
7989 MC_(print_malloc_stats)();
7991 if (MC_(clo_leak_check) != LC_Off) {
7992 LeakCheckParams lcp;
7993 HChar* xt_filename = NULL;
7994 lcp.mode = MC_(clo_leak_check);
7995 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7996 lcp.heuristics = MC_(clo_leak_check_heuristics);
7997 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7998 lcp.deltamode = LCD_Any;
7999 lcp.max_loss_records_output = 999999999;
8000 lcp.requested_by_monitor_command = False;
8001 if (MC_(clo_xtree_leak)) {
8002 xt_filename = VG_(expand_file_name)("--xtree-leak-file",
8003 MC_(clo_xtree_leak_file));
8004 lcp.xt_filename = xt_filename;
8005 lcp.mode = LC_Full;
8006 lcp.show_leak_kinds = MC_(all_Reachedness)();
8008 else
8009 lcp.xt_filename = NULL;
8010 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
8011 if (MC_(clo_xtree_leak))
8012 VG_(free)(xt_filename);
8013 } else {
8014 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8015 VG_(umsg)(
8016 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8017 "\n"
8022 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
8023 && MC_(clo_mc_level) == 2) {
8024 VG_(message)(Vg_UserMsg,
8025 "Use --track-origins=yes to see where "
8026 "uninitialised values come from\n");
8029 /* Print a warning if any client-request generated ignore-ranges
8030 still exist. It would be reasonable to expect that a properly
8031 written program would remove any such ranges before exiting, and
8032 since they are a bit on the dangerous side, let's comment. By
8033 contrast ranges which are specified on the command line normally
8034 pertain to hardware mapped into the address space, and so we
8035 can't expect the client to have got rid of them. */
8036 if (gIgnoredAddressRanges) {
8037 UInt i, nBad = 0;
8038 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
8039 UWord val = IAR_INVALID;
8040 UWord key_min = ~(UWord)0;
8041 UWord key_max = (UWord)0;
8042 VG_(indexRangeMap)( &key_min, &key_max, &val,
8043 gIgnoredAddressRanges, i );
8044 if (val != IAR_ClientReq)
8045 continue;
8046 /* Print the offending range. Also, if it is the first,
8047 print a banner before it. */
8048 nBad++;
8049 if (nBad == 1) {
8050 VG_(umsg)(
8051 "WARNING: exiting program has the following client-requested\n"
8052 "WARNING: address error disablement range(s) still in force,\n"
8053 "WARNING: "
8054 "possibly as a result of some mistake in the use of the\n"
8055 "WARNING: "
8056 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8059 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n",
8060 i, key_min, key_max, showIARKind(val));
8064 done_prof_mem();
8066 if (VG_(clo_stats))
8067 mc_print_stats();
8069 if (0) {
8070 VG_(message)(Vg_DebugMsg,
8071 "------ Valgrind's client block stats follow ---------------\n" );
8072 show_client_block_stats();
8076 /* mark the given addr/len unaddressable for watchpoint implementation
8077 The PointKind will be handled at access time */
8078 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
8079 Addr addr, SizeT len)
8081 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8082 accessibility and definedness in gdbserver so as to allow restoring it
8083 properly. Currently, we assume that the user only watches things
8084 which are properly addressable and defined */
8085 if (insert)
8086 MC_(make_mem_noaccess) (addr, len);
8087 else
8088 MC_(make_mem_defined) (addr, len);
8089 return True;
8092 static void mc_pre_clo_init(void)
8094 VG_(details_name) ("Memcheck");
8095 VG_(details_version) (NULL);
8096 VG_(details_description) ("a memory error detector");
8097 VG_(details_copyright_author)(
8098 "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
8099 VG_(details_bug_reports_to) (VG_BUGS_TO);
8100 VG_(details_avg_translation_sizeB) ( 640 );
8102 VG_(basic_tool_funcs) (mc_post_clo_init,
8103 MC_(instrument),
8104 mc_fini);
8106 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
8109 VG_(needs_core_errors) ();
8110 VG_(needs_tool_errors) (MC_(eq_Error),
8111 MC_(before_pp_Error),
8112 MC_(pp_Error),
8113 True,/*show TIDs for errors*/
8114 MC_(update_Error_extra),
8115 MC_(is_recognised_suppression),
8116 MC_(read_extra_suppression_info),
8117 MC_(error_matches_suppression),
8118 MC_(get_error_name),
8119 MC_(get_extra_suppression_info),
8120 MC_(print_extra_suppression_use),
8121 MC_(update_extra_suppression_use));
8122 VG_(needs_libc_freeres) ();
8123 VG_(needs_cxx_freeres) ();
8124 VG_(needs_command_line_options)(mc_process_cmd_line_options,
8125 mc_print_usage,
8126 mc_print_debug_usage);
8127 VG_(needs_client_requests) (mc_handle_client_request);
8128 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
8129 mc_expensive_sanity_check);
8130 VG_(needs_print_stats) (mc_print_stats);
8131 VG_(needs_info_location) (MC_(pp_describe_addr));
8132 VG_(needs_malloc_replacement) (MC_(malloc),
8133 MC_(__builtin_new),
8134 MC_(__builtin_vec_new),
8135 MC_(memalign),
8136 MC_(calloc),
8137 MC_(free),
8138 MC_(__builtin_delete),
8139 MC_(__builtin_vec_delete),
8140 MC_(realloc),
8141 MC_(malloc_usable_size),
8142 MC_MALLOC_DEFAULT_REDZONE_SZB );
8143 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
8145 VG_(needs_xml_output) ();
8147 VG_(track_new_mem_startup) ( mc_new_mem_startup );
8149 // Handling of mmap and mprotect isn't simple (well, it is simple,
8150 // but the justification isn't.) See comments above, just prior to
8151 // mc_new_mem_mmap.
8152 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
8153 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
8155 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
8157 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
8158 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
8159 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
8161 /* Defer the specification of the new_mem_stack functions to the
8162 post_clo_init function, since we need to first parse the command
8163 line before deciding which set to use. */
8165 # ifdef PERF_FAST_STACK
8166 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
8167 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
8168 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
8169 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
8170 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
8171 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
8172 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
8173 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
8174 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
8175 # endif
8176 VG_(track_die_mem_stack) ( mc_die_mem_stack );
8178 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
8180 VG_(track_pre_mem_read) ( check_mem_is_defined );
8181 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
8182 VG_(track_pre_mem_write) ( check_mem_is_addressable );
8183 VG_(track_post_mem_write) ( mc_post_mem_write );
8185 VG_(track_post_reg_write) ( mc_post_reg_write );
8186 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
8188 if (MC_(clo_mc_level) >= 2) {
8189 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
8190 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
8193 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
8195 init_shadow_memory();
8196 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8197 tl_assert(MC_(chunk_poolalloc) == NULL);
8198 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
8199 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
8200 init_prof_mem();
8202 tl_assert( mc_expensive_sanity_check() );
8204 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8205 tl_assert(sizeof(UWord) == sizeof(Addr));
8206 // Call me paranoid. I don't care.
8207 tl_assert(sizeof(void*) == sizeof(Addr));
8209 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8210 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
8212 /* This is small. Always initialise it. */
8213 init_nia_to_ecu_cache();
8215 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8216 if we need to, since the command line args haven't been
8217 processed yet. Hence defer it to mc_post_clo_init. */
8218 tl_assert(ocacheL1 == NULL);
8219 tl_assert(ocacheL2 == NULL);
8221 /* Check some important stuff. See extensive comments above
8222 re UNALIGNED_OR_HIGH for background. */
8223 # if VG_WORDSIZE == 4
8224 tl_assert(sizeof(void*) == 4);
8225 tl_assert(sizeof(Addr) == 4);
8226 tl_assert(sizeof(UWord) == 4);
8227 tl_assert(sizeof(Word) == 4);
8228 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
8229 tl_assert(MASK(1) == 0UL);
8230 tl_assert(MASK(2) == 1UL);
8231 tl_assert(MASK(4) == 3UL);
8232 tl_assert(MASK(8) == 7UL);
8233 # else
8234 tl_assert(VG_WORDSIZE == 8);
8235 tl_assert(sizeof(void*) == 8);
8236 tl_assert(sizeof(Addr) == 8);
8237 tl_assert(sizeof(UWord) == 8);
8238 tl_assert(sizeof(Word) == 8);
8239 tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
8240 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
8241 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
8242 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
8243 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
8244 # endif
8246 /* Check some assertions to do with the instrumentation machinery. */
8247 MC_(do_instrumentation_startup_checks)();
8250 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
8252 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
8254 /*--------------------------------------------------------------------*/
8255 /*--- end mc_main.c ---*/
8256 /*--------------------------------------------------------------------*/