MEMMOVE_OR_MEMCPY: unroll word-copying loops in the hope of a small speedup.
[valgrind.git] / memcheck / mc_main.c
blob834f5976e23e12520bc595b46e67ec0da80820b6
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
6 /*--- mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
9 /*
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
14 jseward@acm.org
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
31 The GNU General Public License is contained in the file COPYING.
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h" // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 #include "pub_tool_xarray.h"
51 #include "pub_tool_xtree.h"
52 #include "pub_tool_xtmemory.h"
54 #include "mc_include.h"
55 #include "memcheck.h" /* for client requests */
58 /* Set to 1 to enable handwritten assembly helpers on targets for
59 which it is supported. */
60 #define ENABLE_ASSEMBLY_HELPERS 1
62 /* Set to 1 to do a little more sanity checking */
63 #define VG_DEBUG_MEMORY 0
65 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
67 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
68 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
71 /*------------------------------------------------------------*/
72 /*--- Fast-case knobs ---*/
73 /*------------------------------------------------------------*/
75 // Comment these out to disable the fast cases (don't just set them to zero).
77 #define PERF_FAST_LOADV 1
78 #define PERF_FAST_STOREV 1
80 #define PERF_FAST_SARP 1
82 #define PERF_FAST_STACK 1
83 #define PERF_FAST_STACK2 1
85 /* Change this to 1 to enable assertions on origin tracking cache fast
86 paths */
87 #define OC_ENABLE_ASSERTIONS 0
90 /*------------------------------------------------------------*/
91 /*--- Comments on the origin tracking implementation ---*/
92 /*------------------------------------------------------------*/
94 /* See detailed comment entitled
95 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
96 which is contained further on in this file. */
99 /*------------------------------------------------------------*/
100 /*--- V bits and A bits ---*/
101 /*------------------------------------------------------------*/
103 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
104 thinks the corresponding value bit is defined. And every memory byte
105 has an A bit, which tracks whether Memcheck thinks the program can access
106 it safely (ie. it's mapped, and has at least one of the RWX permission bits
107 set). So every N-bit register is shadowed with N V bits, and every memory
108 byte is shadowed with 8 V bits and one A bit.
110 In the implementation, we use two forms of compression (compressed V bits
111 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
112 for memory.
114 Memcheck also tracks extra information about each heap block that is
115 allocated, for detecting memory leaks and other purposes.
118 /*------------------------------------------------------------*/
119 /*--- Basic A/V bitmap representation. ---*/
120 /*------------------------------------------------------------*/
122 /* All reads and writes are checked against a memory map (a.k.a. shadow
123 memory), which records the state of all memory in the process.
125 On 32-bit machines the memory map is organised as follows.
126 The top 16 bits of an address are used to index into a top-level
127 map table, containing 65536 entries. Each entry is a pointer to a
128 second-level map, which records the accesibililty and validity
129 permissions for the 65536 bytes indexed by the lower 16 bits of the
130 address. Each byte is represented by two bits (details are below). So
131 each second-level map contains 16384 bytes. This two-level arrangement
132 conveniently divides the 4G address space into 64k lumps, each size 64k
133 bytes.
135 All entries in the primary (top-level) map must point to a valid
136 secondary (second-level) map. Since many of the 64kB chunks will
137 have the same status for every bit -- ie. noaccess (for unused
138 address space) or entirely addressable and defined (for code segments) --
139 there are three distinguished secondary maps, which indicate 'noaccess',
140 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
141 map entry points to the relevant distinguished map. In practice,
142 typically more than half of the addressable memory is represented with
143 the 'undefined' or 'defined' distinguished secondary map, so it gives a
144 good saving. It also lets us set the V+A bits of large address regions
145 quickly in set_address_range_perms().
147 On 64-bit machines it's more complicated. If we followed the same basic
148 scheme we'd have a four-level table which would require too many memory
149 accesses. So instead the top-level map table has 2^20 entries (indexed
150 using bits 16..35 of the address); this covers the bottom 64GB. Any
151 accesses above 64GB are handled with a slow, sparse auxiliary table.
152 Valgrind's address space manager tries very hard to keep things below
153 this 64GB barrier so that performance doesn't suffer too much.
155 Note that this file has a lot of different functions for reading and
156 writing shadow memory. Only a couple are strictly necessary (eg.
157 get_vabits2 and set_vabits2), most are just specialised for specific
158 common cases to improve performance.
160 Aside: the V+A bits are less precise than they could be -- we have no way
161 of marking memory as read-only. It would be great if we could add an
162 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
163 which requires 2.3 bits to hold, and there's no way to do that elegantly
164 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
165 seem worth it.
168 /* --------------- Basic configuration --------------- */
170 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
172 #if VG_WORDSIZE == 4
174 /* cover the entire address space */
175 # define N_PRIMARY_BITS 16
177 #else
179 /* Just handle the first 128G fast and the rest via auxiliary
180 primaries. If you change this, Memcheck will assert at startup.
181 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
182 # define N_PRIMARY_BITS 21
184 #endif
187 /* Do not change this. */
188 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
190 /* Do not change this. */
191 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
194 /* --------------- Secondary maps --------------- */
196 // Each byte of memory conceptually has an A bit, which indicates its
197 // addressability, and 8 V bits, which indicates its definedness.
199 // But because very few bytes are partially defined, we can use a nice
200 // compression scheme to reduce the size of shadow memory. Each byte of
201 // memory has 2 bits which indicates its state (ie. V+A bits):
203 // 00: noaccess (unaddressable but treated as fully defined)
204 // 01: undefined (addressable and fully undefined)
205 // 10: defined (addressable and fully defined)
206 // 11: partdefined (addressable and partially defined)
208 // In the "partdefined" case, we use a secondary table to store the V bits.
209 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
210 // bits.
212 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
213 // four bytes (32 bits) of memory are in each chunk. Hence the name
214 // "vabits8". This lets us get the V+A bits for four bytes at a time
215 // easily (without having to do any shifting and/or masking), and that is a
216 // very common operation. (Note that although each vabits8 chunk
217 // is 8 bits in size, it represents 32 bits of memory.)
219 // The representation is "inverse" little-endian... each 4 bytes of
220 // memory is represented by a 1 byte value, where:
222 // - the status of byte (a+0) is held in bits [1..0]
223 // - the status of byte (a+1) is held in bits [3..2]
224 // - the status of byte (a+2) is held in bits [5..4]
225 // - the status of byte (a+3) is held in bits [7..6]
227 // It's "inverse" because endianness normally describes a mapping from
228 // value bits to memory addresses; in this case the mapping is inverted.
229 // Ie. instead of particular value bits being held in certain addresses, in
230 // this case certain addresses are represented by particular value bits.
231 // See insert_vabits2_into_vabits8() for an example.
233 // But note that we don't compress the V bits stored in registers; they
234 // need to be explicit to made the shadow operations possible. Therefore
235 // when moving values between registers and memory we need to convert
236 // between the expanded in-register format and the compressed in-memory
237 // format. This isn't so difficult, it just requires careful attention in a
238 // few places.
240 // These represent eight bits of memory.
241 #define VA_BITS2_NOACCESS 0x0 // 00b
242 #define VA_BITS2_UNDEFINED 0x1 // 01b
243 #define VA_BITS2_DEFINED 0x2 // 10b
244 #define VA_BITS2_PARTDEFINED 0x3 // 11b
246 // These represent 16 bits of memory.
247 #define VA_BITS4_NOACCESS 0x0 // 00_00b
248 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
249 #define VA_BITS4_DEFINED 0xa // 10_10b
251 // These represent 32 bits of memory.
252 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
253 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
254 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
256 // These represent 64 bits of memory.
257 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
258 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
259 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
261 // These represent 128 bits of memory.
262 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
265 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
266 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
267 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
269 // Paranoia: it's critical for performance that the requested inlining
270 // occurs. So try extra hard.
271 #define INLINE inline __attribute__((always_inline))
273 static INLINE Addr start_of_this_sm ( Addr a ) {
274 return (a & (~SM_MASK));
276 static INLINE Bool is_start_of_sm ( Addr a ) {
277 return (start_of_this_sm(a) == a);
280 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
282 typedef
283 union {
284 UChar vabits8[SM_CHUNKS];
285 UShort vabits16[SM_CHUNKS/2];
287 SecMap;
289 // 3 distinguished secondary maps, one for no-access, one for
290 // accessible but undefined, and one for accessible and defined.
291 // Distinguished secondaries may never be modified.
292 #define SM_DIST_NOACCESS 0
293 #define SM_DIST_UNDEFINED 1
294 #define SM_DIST_DEFINED 2
296 static SecMap sm_distinguished[3];
298 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
299 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
302 // Forward declaration
303 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
305 /* dist_sm points to one of our three distinguished secondaries. Make
306 a copy of it so that we can write to it.
308 static SecMap* copy_for_writing ( SecMap* dist_sm )
310 SecMap* new_sm;
311 tl_assert(dist_sm == &sm_distinguished[0]
312 || dist_sm == &sm_distinguished[1]
313 || dist_sm == &sm_distinguished[2]);
315 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
316 if (new_sm == NULL)
317 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
318 sizeof(SecMap) );
319 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
320 update_SM_counts(dist_sm, new_sm);
321 return new_sm;
324 /* --------------- Stats --------------- */
326 static Int n_issued_SMs = 0;
327 static Int n_deissued_SMs = 0;
328 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
329 static Int n_undefined_SMs = 0;
330 static Int n_defined_SMs = 0;
331 static Int n_non_DSM_SMs = 0;
332 static Int max_noaccess_SMs = 0;
333 static Int max_undefined_SMs = 0;
334 static Int max_defined_SMs = 0;
335 static Int max_non_DSM_SMs = 0;
337 /* # searches initiated in auxmap_L1, and # base cmps required */
338 static ULong n_auxmap_L1_searches = 0;
339 static ULong n_auxmap_L1_cmps = 0;
340 /* # of searches that missed in auxmap_L1 and therefore had to
341 be handed to auxmap_L2. And the number of nodes inserted. */
342 static ULong n_auxmap_L2_searches = 0;
343 static ULong n_auxmap_L2_nodes = 0;
345 static Int n_sanity_cheap = 0;
346 static Int n_sanity_expensive = 0;
348 static Int n_secVBit_nodes = 0;
349 static Int max_secVBit_nodes = 0;
351 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
353 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
354 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
355 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
356 else { n_non_DSM_SMs --;
357 n_deissued_SMs ++; }
359 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
360 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
361 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
362 else { n_non_DSM_SMs ++;
363 n_issued_SMs ++; }
365 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
366 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
367 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
368 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
371 /* --------------- Primary maps --------------- */
373 /* The main primary map. This covers some initial part of the address
374 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
375 handled using the auxiliary primary map.
377 static SecMap* primary_map[N_PRIMARY_MAP];
380 /* An entry in the auxiliary primary map. base must be a 64k-aligned
381 value, and sm points at the relevant secondary map. As with the
382 main primary map, the secondary may be either a real secondary, or
383 one of the three distinguished secondaries. DO NOT CHANGE THIS
384 LAYOUT: the first word has to be the key for OSet fast lookups.
386 typedef
387 struct {
388 Addr base;
389 SecMap* sm;
391 AuxMapEnt;
393 /* Tunable parameter: How big is the L1 queue? */
394 #define N_AUXMAP_L1 24
396 /* Tunable parameter: How far along the L1 queue to insert
397 entries resulting from L2 lookups? */
398 #define AUXMAP_L1_INSERT_IX 12
400 static struct {
401 Addr base;
402 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
404 auxmap_L1[N_AUXMAP_L1];
406 static OSet* auxmap_L2 = NULL;
408 static void init_auxmap_L1_L2 ( void )
410 Int i;
411 for (i = 0; i < N_AUXMAP_L1; i++) {
412 auxmap_L1[i].base = 0;
413 auxmap_L1[i].ent = NULL;
416 tl_assert(0 == offsetof(AuxMapEnt,base));
417 tl_assert(sizeof(Addr) == sizeof(void*));
418 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
419 /*fastCmp*/ NULL,
420 VG_(malloc), "mc.iaLL.1", VG_(free) );
423 /* Check representation invariants; if OK return NULL; else a
424 descriptive bit of text. Also return the number of
425 non-distinguished secondary maps referred to from the auxiliary
426 primary maps. */
428 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
430 Word i, j;
431 /* On a 32-bit platform, the L2 and L1 tables should
432 both remain empty forever.
434 On a 64-bit platform:
435 In the L2 table:
436 all .base & 0xFFFF == 0
437 all .base > MAX_PRIMARY_ADDRESS
438 In the L1 table:
439 all .base & 0xFFFF == 0
440 all (.base > MAX_PRIMARY_ADDRESS
441 .base & 0xFFFF == 0
442 and .ent points to an AuxMapEnt with the same .base)
444 (.base == 0 and .ent == NULL)
446 *n_secmaps_found = 0;
447 if (sizeof(void*) == 4) {
448 /* 32-bit platform */
449 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
450 return "32-bit: auxmap_L2 is non-empty";
451 for (i = 0; i < N_AUXMAP_L1; i++)
452 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
453 return "32-bit: auxmap_L1 is non-empty";
454 } else {
455 /* 64-bit platform */
456 UWord elems_seen = 0;
457 AuxMapEnt *elem, *res;
458 AuxMapEnt key;
459 /* L2 table */
460 VG_(OSetGen_ResetIter)(auxmap_L2);
461 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
462 elems_seen++;
463 if (0 != (elem->base & (Addr)0xFFFF))
464 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
465 if (elem->base <= MAX_PRIMARY_ADDRESS)
466 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
467 if (elem->sm == NULL)
468 return "64-bit: .sm in _L2 is NULL";
469 if (!is_distinguished_sm(elem->sm))
470 (*n_secmaps_found)++;
472 if (elems_seen != n_auxmap_L2_nodes)
473 return "64-bit: disagreement on number of elems in _L2";
474 /* Check L1-L2 correspondence */
475 for (i = 0; i < N_AUXMAP_L1; i++) {
476 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
477 continue;
478 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
479 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
480 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
481 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
482 if (auxmap_L1[i].ent == NULL)
483 return "64-bit: .ent is NULL in auxmap_L1";
484 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
485 return "64-bit: _L1 and _L2 bases are inconsistent";
486 /* Look it up in auxmap_L2. */
487 key.base = auxmap_L1[i].base;
488 key.sm = 0;
489 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
490 if (res == NULL)
491 return "64-bit: _L1 .base not found in _L2";
492 if (res != auxmap_L1[i].ent)
493 return "64-bit: _L1 .ent disagrees with _L2 entry";
495 /* Check L1 contains no duplicates */
496 for (i = 0; i < N_AUXMAP_L1; i++) {
497 if (auxmap_L1[i].base == 0)
498 continue;
499 for (j = i+1; j < N_AUXMAP_L1; j++) {
500 if (auxmap_L1[j].base == 0)
501 continue;
502 if (auxmap_L1[j].base == auxmap_L1[i].base)
503 return "64-bit: duplicate _L1 .base entries";
507 return NULL; /* ok */
510 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
512 Word i;
513 tl_assert(ent);
514 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
515 for (i = N_AUXMAP_L1-1; i > rank; i--)
516 auxmap_L1[i] = auxmap_L1[i-1];
517 auxmap_L1[rank].base = ent->base;
518 auxmap_L1[rank].ent = ent;
521 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
523 AuxMapEnt key;
524 AuxMapEnt* res;
525 Word i;
527 tl_assert(a > MAX_PRIMARY_ADDRESS);
528 a &= ~(Addr)0xFFFF;
530 /* First search the front-cache, which is a self-organising
531 list containing the most popular entries. */
533 if (LIKELY(auxmap_L1[0].base == a))
534 return auxmap_L1[0].ent;
535 if (LIKELY(auxmap_L1[1].base == a)) {
536 Addr t_base = auxmap_L1[0].base;
537 AuxMapEnt* t_ent = auxmap_L1[0].ent;
538 auxmap_L1[0].base = auxmap_L1[1].base;
539 auxmap_L1[0].ent = auxmap_L1[1].ent;
540 auxmap_L1[1].base = t_base;
541 auxmap_L1[1].ent = t_ent;
542 return auxmap_L1[0].ent;
545 n_auxmap_L1_searches++;
547 for (i = 0; i < N_AUXMAP_L1; i++) {
548 if (auxmap_L1[i].base == a) {
549 break;
552 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
554 n_auxmap_L1_cmps += (ULong)(i+1);
556 if (i < N_AUXMAP_L1) {
557 if (i > 0) {
558 Addr t_base = auxmap_L1[i-1].base;
559 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
560 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
561 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
562 auxmap_L1[i-0].base = t_base;
563 auxmap_L1[i-0].ent = t_ent;
564 i--;
566 return auxmap_L1[i].ent;
569 n_auxmap_L2_searches++;
571 /* First see if we already have it. */
572 key.base = a;
573 key.sm = 0;
575 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
576 if (res)
577 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
578 return res;
581 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
583 AuxMapEnt *nyu, *res;
585 /* First see if we already have it. */
586 res = maybe_find_in_auxmap( a );
587 if (LIKELY(res))
588 return res;
590 /* Ok, there's no entry in the secondary map, so we'll have
591 to allocate one. */
592 a &= ~(Addr)0xFFFF;
594 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
595 nyu->base = a;
596 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
597 VG_(OSetGen_Insert)( auxmap_L2, nyu );
598 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
599 n_auxmap_L2_nodes++;
600 return nyu;
603 /* --------------- SecMap fundamentals --------------- */
605 // In all these, 'low' means it's definitely in the main primary map,
606 // 'high' means it's definitely in the auxiliary table.
608 static INLINE UWord get_primary_map_low_offset ( Addr a )
610 UWord pm_off = a >> 16;
611 return pm_off;
614 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
616 UWord pm_off = a >> 16;
617 # if VG_DEBUG_MEMORY >= 1
618 tl_assert(pm_off < N_PRIMARY_MAP);
619 # endif
620 return &primary_map[ pm_off ];
623 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
625 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
626 return &am->sm;
629 static INLINE SecMap** get_secmap_ptr ( Addr a )
631 return ( a <= MAX_PRIMARY_ADDRESS
632 ? get_secmap_low_ptr(a)
633 : get_secmap_high_ptr(a));
636 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
638 return *get_secmap_low_ptr(a);
641 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
643 return *get_secmap_high_ptr(a);
646 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
648 SecMap** p = get_secmap_low_ptr(a);
649 if (UNLIKELY(is_distinguished_sm(*p)))
650 *p = copy_for_writing(*p);
651 return *p;
654 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
656 SecMap** p = get_secmap_high_ptr(a);
657 if (UNLIKELY(is_distinguished_sm(*p)))
658 *p = copy_for_writing(*p);
659 return *p;
662 /* Produce the secmap for 'a', either from the primary map or by
663 ensuring there is an entry for it in the aux primary map. The
664 secmap may be a distinguished one as the caller will only want to
665 be able to read it.
667 static INLINE SecMap* get_secmap_for_reading ( Addr a )
669 return ( a <= MAX_PRIMARY_ADDRESS
670 ? get_secmap_for_reading_low (a)
671 : get_secmap_for_reading_high(a) );
674 /* Produce the secmap for 'a', either from the primary map or by
675 ensuring there is an entry for it in the aux primary map. The
676 secmap may not be a distinguished one, since the caller will want
677 to be able to write it. If it is a distinguished secondary, make a
678 writable copy of it, install it, and return the copy instead. (COW
679 semantics).
681 static INLINE SecMap* get_secmap_for_writing ( Addr a )
683 return ( a <= MAX_PRIMARY_ADDRESS
684 ? get_secmap_for_writing_low (a)
685 : get_secmap_for_writing_high(a) );
688 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
689 allocate one if one doesn't already exist. This is used by the
690 leak checker.
692 static SecMap* maybe_get_secmap_for ( Addr a )
694 if (a <= MAX_PRIMARY_ADDRESS) {
695 return get_secmap_for_reading_low(a);
696 } else {
697 AuxMapEnt* am = maybe_find_in_auxmap(a);
698 return am ? am->sm : NULL;
702 /* --------------- Fundamental functions --------------- */
704 static INLINE
705 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
707 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
708 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
709 *vabits8 |= (vabits2 << shift); // mask in the two new bits
712 static INLINE
713 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
715 UInt shift;
716 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
717 shift = (a & 2) << 1; // shift by 0 or 4
718 *vabits8 &= ~(0xf << shift); // mask out the four old bits
719 *vabits8 |= (vabits4 << shift); // mask in the four new bits
722 static INLINE
723 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
725 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
726 vabits8 >>= shift; // shift the two bits to the bottom
727 return 0x3 & vabits8; // mask out the rest
730 static INLINE
731 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
733 UInt shift;
734 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
735 shift = (a & 2) << 1; // shift by 0 or 4
736 vabits8 >>= shift; // shift the four bits to the bottom
737 return 0xf & vabits8; // mask out the rest
740 // Note that these four are only used in slow cases. The fast cases do
741 // clever things like combine the auxmap check (in
742 // get_secmap_{read,writ}able) with alignment checks.
744 // *** WARNING! ***
745 // Any time this function is called, if it is possible that vabits2
746 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
747 // sec-V-bits table must also be set!
748 static INLINE
749 void set_vabits2 ( Addr a, UChar vabits2 )
751 SecMap* sm = get_secmap_for_writing(a);
752 UWord sm_off = SM_OFF(a);
753 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
756 static INLINE
757 UChar get_vabits2 ( Addr a )
759 SecMap* sm = get_secmap_for_reading(a);
760 UWord sm_off = SM_OFF(a);
761 UChar vabits8 = sm->vabits8[sm_off];
762 return extract_vabits2_from_vabits8(a, vabits8);
765 // *** WARNING! ***
766 // Any time this function is called, if it is possible that any of the
767 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
768 // corresponding entry(s) in the sec-V-bits table must also be set!
769 static INLINE
770 UChar get_vabits8_for_aligned_word32 ( Addr a )
772 SecMap* sm = get_secmap_for_reading(a);
773 UWord sm_off = SM_OFF(a);
774 UChar vabits8 = sm->vabits8[sm_off];
775 return vabits8;
778 static INLINE
779 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
781 SecMap* sm = get_secmap_for_writing(a);
782 UWord sm_off = SM_OFF(a);
783 sm->vabits8[sm_off] = vabits8;
787 // Forward declarations
788 static UWord get_sec_vbits8(Addr a);
789 static void set_sec_vbits8(Addr a, UWord vbits8);
791 // Returns False if there was an addressability error.
792 static INLINE
793 Bool set_vbits8 ( Addr a, UChar vbits8 )
795 Bool ok = True;
796 UChar vabits2 = get_vabits2(a);
797 if ( VA_BITS2_NOACCESS != vabits2 ) {
798 // Addressable. Convert in-register format to in-memory format.
799 // Also remove any existing sec V bit entry for the byte if no
800 // longer necessary.
801 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
802 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
803 else { vabits2 = VA_BITS2_PARTDEFINED;
804 set_sec_vbits8(a, vbits8); }
805 set_vabits2(a, vabits2);
807 } else {
808 // Unaddressable! Do nothing -- when writing to unaddressable
809 // memory it acts as a black hole, and the V bits can never be seen
810 // again. So we don't have to write them at all.
811 ok = False;
813 return ok;
816 // Returns False if there was an addressability error. In that case, we put
817 // all defined bits into vbits8.
818 static INLINE
819 Bool get_vbits8 ( Addr a, UChar* vbits8 )
821 Bool ok = True;
822 UChar vabits2 = get_vabits2(a);
824 // Convert the in-memory format to in-register format.
825 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
826 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
827 else if ( VA_BITS2_NOACCESS == vabits2 ) {
828 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
829 ok = False;
830 } else {
831 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
832 *vbits8 = get_sec_vbits8(a);
834 return ok;
838 /* --------------- Secondary V bit table ------------ */
840 // This table holds the full V bit pattern for partially-defined bytes
841 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
842 // memory.
844 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
845 // then overwrite the same address with a fully defined byte, the sec-V-bit
846 // node will not necessarily be removed. This is because checking for
847 // whether removal is necessary would slow down the fast paths.
849 // To avoid the stale nodes building up too much, we periodically (once the
850 // table reaches a certain size) garbage collect (GC) the table by
851 // traversing it and evicting any nodes not having PDB.
852 // If more than a certain proportion of nodes survived, we increase the
853 // table size so that GCs occur less often.
855 // This policy is designed to avoid bad table bloat in the worst case where
856 // a program creates huge numbers of stale PDBs -- we would get this bloat
857 // if we had no GC -- while handling well the case where a node becomes
858 // stale but shortly afterwards is rewritten with a PDB and so becomes
859 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
860 // remove all stale nodes as soon as possible, we just end up re-adding a
861 // lot of them in later again. The "sufficiently stale" approach avoids
862 // this. (If a program has many live PDBs, performance will just suck,
863 // there's no way around that.)
865 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
866 // holding on to stale entries for 2 GCs before discarding them can lead
867 // to massive space leaks. So we're changing to an arrangement where
868 // lines are evicted as soon as they are observed to be stale during a
869 // GC. This also has a side benefit of allowing the sufficiently_stale
870 // field to be removed from the SecVBitNode struct, reducing its size by
871 // 8 bytes, which is a substantial space saving considering that the
872 // struct was previously 32 or so bytes, on a 64 bit target.
874 // In order to try and mitigate the problem that the "sufficiently stale"
875 // heuristic was designed to avoid, the table size is allowed to drift
876 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
877 // means that nodes will exist in the table longer on average, and hopefully
878 // will be deleted and re-added less frequently.
880 // The previous scaling up mechanism (now called STEPUP) is retained:
881 // if residency exceeds 50%, the table is scaled up, although by a
882 // factor sqrt(2) rather than 2 as before. This effectively doubles the
883 // frequency of GCs when there are many PDBs at reduces the tendency of
884 // stale PDBs to reside for long periods in the table.
886 static OSet* secVBitTable;
888 // Stats
889 static ULong sec_vbits_new_nodes = 0;
890 static ULong sec_vbits_updates = 0;
892 // This must be a power of two; this is checked in mc_pre_clo_init().
893 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
894 // a larger address range) they take more space but we can get multiple
895 // partially-defined bytes in one if they are close to each other, reducing
896 // the number of total nodes. In practice sometimes they are clustered (eg.
897 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
898 // row), but often not. So we choose something intermediate.
899 #define BYTES_PER_SEC_VBIT_NODE 16
901 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
902 // more than this many nodes survive a GC.
903 #define STEPUP_SURVIVOR_PROPORTION 0.5
904 #define STEPUP_GROWTH_FACTOR 1.414213562
906 // If the above heuristic doesn't apply, then we may make the table
907 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
908 // this many nodes survive a GC, _and_ the total table size does
909 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
910 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
911 // effectively although gradually reduces residency and increases time
912 // between GCs for programs with small numbers of PDBs. The 80000 limit
913 // effectively limits the table size to around 2MB for programs with
914 // small numbers of PDBs, whilst giving a reasonably long lifetime to
915 // entries, to try and reduce the costs resulting from deleting and
916 // re-adding of entries.
917 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
918 #define DRIFTUP_GROWTH_FACTOR 1.015
919 #define DRIFTUP_MAX_SIZE 80000
921 // We GC the table when it gets this many nodes in it, ie. it's effectively
922 // the table size. It can change.
923 static Int secVBitLimit = 1000;
925 // The number of GCs done, used to age sec-V-bit nodes for eviction.
926 // Because it's unsigned, wrapping doesn't matter -- the right answer will
927 // come out anyway.
928 static UInt GCs_done = 0;
930 typedef
931 struct {
932 Addr a;
933 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
935 SecVBitNode;
937 static OSet* createSecVBitTable(void)
939 OSet* newSecVBitTable;
940 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
941 ( offsetof(SecVBitNode, a),
942 NULL, // use fast comparisons
943 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
944 VG_(free),
945 1000,
946 sizeof(SecVBitNode));
947 return newSecVBitTable;
950 static void gcSecVBitTable(void)
952 OSet* secVBitTable2;
953 SecVBitNode* n;
954 Int i, n_nodes = 0, n_survivors = 0;
956 GCs_done++;
958 // Create the new table.
959 secVBitTable2 = createSecVBitTable();
961 // Traverse the table, moving fresh nodes into the new table.
962 VG_(OSetGen_ResetIter)(secVBitTable);
963 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
964 // Keep node if any of its bytes are non-stale. Using
965 // get_vabits2() for the lookup is not very efficient, but I don't
966 // think it matters.
967 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
968 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
969 // Found a non-stale byte, so keep =>
970 // Insert a copy of the node into the new table.
971 SecVBitNode* n2 =
972 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
973 *n2 = *n;
974 VG_(OSetGen_Insert)(secVBitTable2, n2);
975 break;
980 // Get the before and after sizes.
981 n_nodes = VG_(OSetGen_Size)(secVBitTable);
982 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
984 // Destroy the old table, and put the new one in its place.
985 VG_(OSetGen_Destroy)(secVBitTable);
986 secVBitTable = secVBitTable2;
988 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
989 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
990 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
993 // Increase table size if necessary.
994 if ((Double)n_survivors
995 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
996 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
997 if (VG_(clo_verbosity) > 1)
998 VG_(message)(Vg_DebugMsg,
999 "memcheck GC: %d new table size (stepup)\n",
1000 secVBitLimit);
1002 else
1003 if (secVBitLimit < DRIFTUP_MAX_SIZE
1004 && (Double)n_survivors
1005 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
1006 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
1007 if (VG_(clo_verbosity) > 1)
1008 VG_(message)(Vg_DebugMsg,
1009 "memcheck GC: %d new table size (driftup)\n",
1010 secVBitLimit);
1014 static UWord get_sec_vbits8(Addr a)
1016 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1017 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1018 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1019 UChar vbits8;
1020 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1021 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1022 // make it to the secondary V bits table.
1023 vbits8 = n->vbits8[amod];
1024 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1025 return vbits8;
1028 static void set_sec_vbits8(Addr a, UWord vbits8)
1030 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1031 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1032 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1033 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1034 // make it to the secondary V bits table.
1035 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1036 if (n) {
1037 n->vbits8[amod] = vbits8; // update
1038 sec_vbits_updates++;
1039 } else {
1040 // Do a table GC if necessary. Nb: do this before creating and
1041 // inserting the new node, to avoid erroneously GC'ing the new node.
1042 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1043 gcSecVBitTable();
1046 // New node: assign the specific byte, make the rest invalid (they
1047 // should never be read as-is, but be cautious).
1048 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1049 n->a = aAligned;
1050 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1051 n->vbits8[i] = V_BITS8_UNDEFINED;
1053 n->vbits8[amod] = vbits8;
1055 // Insert the new node.
1056 VG_(OSetGen_Insert)(secVBitTable, n);
1057 sec_vbits_new_nodes++;
1059 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1060 if (n_secVBit_nodes > max_secVBit_nodes)
1061 max_secVBit_nodes = n_secVBit_nodes;
1065 /* --------------- Endianness helpers --------------- */
1067 /* Returns the offset in memory of the byteno-th most significant byte
1068 in a wordszB-sized word, given the specified endianness. */
1069 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1070 UWord byteno ) {
1071 return bigendian ? (wordszB-1-byteno) : byteno;
1075 /* --------------- Ignored address ranges --------------- */
1077 /* Denotes the address-error-reportability status for address ranges:
1078 IAR_NotIgnored: the usual case -- report errors in this range
1079 IAR_CommandLine: don't report errors -- from command line setting
1080 IAR_ClientReq: don't report errors -- from client request
1082 typedef
1083 enum { IAR_INVALID=99,
1084 IAR_NotIgnored,
1085 IAR_CommandLine,
1086 IAR_ClientReq }
1087 IARKind;
1089 static const HChar* showIARKind ( IARKind iark )
1091 switch (iark) {
1092 case IAR_INVALID: return "INVALID";
1093 case IAR_NotIgnored: return "NotIgnored";
1094 case IAR_CommandLine: return "CommandLine";
1095 case IAR_ClientReq: return "ClientReq";
1096 default: return "???";
1100 // RangeMap<IARKind>
1101 static RangeMap* gIgnoredAddressRanges = NULL;
1103 static void init_gIgnoredAddressRanges ( void )
1105 if (LIKELY(gIgnoredAddressRanges != NULL))
1106 return;
1107 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1108 VG_(free), IAR_NotIgnored );
1111 Bool MC_(in_ignored_range) ( Addr a )
1113 if (LIKELY(gIgnoredAddressRanges == NULL))
1114 return False;
1115 UWord how = IAR_INVALID;
1116 UWord key_min = ~(UWord)0;
1117 UWord key_max = (UWord)0;
1118 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1119 tl_assert(key_min <= a && a <= key_max);
1120 switch (how) {
1121 case IAR_NotIgnored: return False;
1122 case IAR_CommandLine: return True;
1123 case IAR_ClientReq: return True;
1124 default: break; /* invalid */
1126 VG_(tool_panic)("MC_(in_ignore_range)");
1127 /*NOTREACHED*/
1130 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
1132 if (LIKELY(!MC_(clo_ignore_range_below_sp)))
1133 return False;
1134 tl_assert(szB >= 1 && szB <= 32);
1135 tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
1136 > MC_(clo_ignore_range_below_sp__last_offset));
1137 Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
1138 Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
1139 if (range_lo >= range_hi) {
1140 /* Bizarre. We have a wraparound situation. What should we do? */
1141 return False; // Play safe
1142 } else {
1143 /* This is the expected case. */
1144 if (range_lo <= a && a + szB - 1 <= range_hi)
1145 return True;
1146 else
1147 return False;
1149 /*NOTREACHED*/
1150 tl_assert(0);
1153 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1155 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
1157 Bool ok = VG_(parse_Addr) (ppc, result1);
1158 if (!ok)
1159 return False;
1160 if (**ppc != '-')
1161 return False;
1162 (*ppc)++;
1163 ok = VG_(parse_Addr) (ppc, result2);
1164 if (!ok)
1165 return False;
1166 return True;
1169 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1170 or fail. */
1172 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
1174 Bool ok = VG_(parse_UInt) (ppc, result1);
1175 if (!ok)
1176 return False;
1177 if (**ppc != '-')
1178 return False;
1179 (*ppc)++;
1180 ok = VG_(parse_UInt) (ppc, result2);
1181 if (!ok)
1182 return False;
1183 return True;
1186 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1187 fail. If they are valid, add them to the global set of ignored
1188 ranges. */
1189 static Bool parse_ignore_ranges ( const HChar* str0 )
1191 init_gIgnoredAddressRanges();
1192 const HChar* str = str0;
1193 const HChar** ppc = &str;
1194 while (1) {
1195 Addr start = ~(Addr)0;
1196 Addr end = (Addr)0;
1197 Bool ok = parse_Addr_pair(ppc, &start, &end);
1198 if (!ok)
1199 return False;
1200 if (start > end)
1201 return False;
1202 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1203 if (**ppc == 0)
1204 return True;
1205 if (**ppc != ',')
1206 return False;
1207 (*ppc)++;
1209 /*NOTREACHED*/
1210 return False;
1213 /* Add or remove [start, +len) from the set of ignored ranges. */
1214 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1216 init_gIgnoredAddressRanges();
1217 const Bool verbose = (VG_(clo_verbosity) > 1);
1218 if (len == 0) {
1219 return False;
1221 if (addRange) {
1222 VG_(bindRangeMap)(gIgnoredAddressRanges,
1223 start, start+len-1, IAR_ClientReq);
1224 if (verbose)
1225 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1226 (void*)start, (void*)(start+len-1));
1227 } else {
1228 VG_(bindRangeMap)(gIgnoredAddressRanges,
1229 start, start+len-1, IAR_NotIgnored);
1230 if (verbose)
1231 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1232 (void*)start, (void*)(start+len-1));
1234 if (verbose) {
1235 VG_(dmsg)("memcheck: now have %u ranges:\n",
1236 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1237 UInt i;
1238 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1239 UWord val = IAR_INVALID;
1240 UWord key_min = ~(UWord)0;
1241 UWord key_max = (UWord)0;
1242 VG_(indexRangeMap)( &key_min, &key_max, &val,
1243 gIgnoredAddressRanges, i );
1244 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n",
1245 i, key_min, key_max, showIARKind(val));
1248 return True;
1252 /* --------------- Load/store slow cases. --------------- */
1254 static
1255 __attribute__((noinline))
1256 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1257 Addr a, SizeT nBits, Bool bigendian )
1259 ULong pessim[4]; /* only used when p-l-ok=yes */
1260 SSizeT szB = nBits / 8;
1261 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1262 SSizeT i, j; /* Must be signed. */
1263 SizeT n_addrs_bad = 0;
1264 Addr ai;
1265 UChar vbits8;
1266 Bool ok;
1268 /* Code below assumes load size is a power of two and at least 64
1269 bits. */
1270 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1272 /* If this triggers, you probably just need to increase the size of
1273 the pessim array. */
1274 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1276 for (j = 0; j < szL; j++) {
1277 pessim[j] = V_BITS64_DEFINED;
1278 res[j] = V_BITS64_UNDEFINED;
1281 /* Make up a result V word, which contains the loaded data for
1282 valid addresses and Defined for invalid addresses. Iterate over
1283 the bytes in the word, from the most significant down to the
1284 least. The vbits to return are calculated into vbits128. Also
1285 compute the pessimising value to be used when
1286 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1287 info can be gleaned from the pessim array) but is used as a
1288 cross-check. */
1289 for (j = szL-1; j >= 0; j--) {
1290 ULong vbits64 = V_BITS64_UNDEFINED;
1291 ULong pessim64 = V_BITS64_DEFINED;
1292 UWord long_index = byte_offset_w(szL, bigendian, j);
1293 for (i = 8-1; i >= 0; i--) {
1294 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1295 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1296 ok = get_vbits8(ai, &vbits8);
1297 vbits64 <<= 8;
1298 vbits64 |= vbits8;
1299 if (!ok) n_addrs_bad++;
1300 pessim64 <<= 8;
1301 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1303 res[long_index] = vbits64;
1304 pessim[long_index] = pessim64;
1307 /* In the common case, all the addresses involved are valid, so we
1308 just return the computed V bits and have done. */
1309 if (LIKELY(n_addrs_bad == 0))
1310 return;
1312 /* If there's no possibility of getting a partial-loads-ok
1313 exemption, report the error and quit. */
1314 if (!MC_(clo_partial_loads_ok)) {
1315 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1316 return;
1319 /* The partial-loads-ok excemption might apply. Find out if it
1320 does. If so, don't report an addressing error, but do return
1321 Undefined for the bytes that are out of range, so as to avoid
1322 false negatives. If it doesn't apply, just report an addressing
1323 error in the usual way. */
1325 /* Some code steps along byte strings in aligned chunks
1326 even when there is only a partially defined word at the end (eg,
1327 optimised strlen). This is allowed by the memory model of
1328 modern machines, since an aligned load cannot span two pages and
1329 thus cannot "partially fault".
1331 Therefore, a load from a partially-addressible place is allowed
1332 if all of the following hold:
1333 - the command-line flag is set [by default, it isn't]
1334 - it's an aligned load
1335 - at least one of the addresses in the word *is* valid
1337 Since this suppresses the addressing error, we avoid false
1338 negatives by marking bytes undefined when they come from an
1339 invalid address.
1342 /* "at least one of the addresses is invalid" */
1343 ok = False;
1344 for (j = 0; j < szL; j++)
1345 ok |= pessim[j] != V_BITS64_DEFINED;
1346 tl_assert(ok);
1348 if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1349 /* Exemption applies. Use the previously computed pessimising
1350 value and return the combined result, but don't flag an
1351 addressing error. The pessimising value is Defined for valid
1352 addresses and Undefined for invalid addresses. */
1353 /* for assumption that doing bitwise or implements UifU */
1354 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1355 /* (really need "UifU" here...)
1356 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1357 for (j = szL-1; j >= 0; j--)
1358 res[j] |= pessim[j];
1359 return;
1362 /* Exemption doesn't apply. Flag an addressing error in the normal
1363 way. */
1364 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1368 static
1369 __attribute__((noinline))
1370 __attribute__((used))
1371 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1372 this function may get called from hand written assembly. */
1373 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1375 PROF_EVENT(MCPE_LOADVN_SLOW);
1377 /* ------------ BEGIN semi-fast cases ------------ */
1378 /* These deal quickly-ish with the common auxiliary primary map
1379 cases on 64-bit platforms. Are merely a speedup hack; can be
1380 omitted without loss of correctness/functionality. Note that in
1381 both cases the "sizeof(void*) == 8" causes these cases to be
1382 folded out by compilers on 32-bit platforms. These are derived
1383 from LOADV64 and LOADV32.
1385 if (LIKELY(sizeof(void*) == 8
1386 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1387 SecMap* sm = get_secmap_for_reading(a);
1388 UWord sm_off16 = SM_OFF_16(a);
1389 UWord vabits16 = sm->vabits16[sm_off16];
1390 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1391 return V_BITS64_DEFINED;
1392 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1393 return V_BITS64_UNDEFINED;
1394 /* else fall into the slow case */
1396 if (LIKELY(sizeof(void*) == 8
1397 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1398 SecMap* sm = get_secmap_for_reading(a);
1399 UWord sm_off = SM_OFF(a);
1400 UWord vabits8 = sm->vabits8[sm_off];
1401 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1402 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1403 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1404 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1405 /* else fall into slow case */
1407 /* ------------ END semi-fast cases ------------ */
1409 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1410 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1411 SSizeT szB = nBits / 8;
1412 SSizeT i; /* Must be signed. */
1413 SizeT n_addrs_bad = 0;
1414 Addr ai;
1415 UChar vbits8;
1416 Bool ok;
1418 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1420 /* Make up a 64-bit result V word, which contains the loaded data
1421 for valid addresses and Defined for invalid addresses. Iterate
1422 over the bytes in the word, from the most significant down to
1423 the least. The vbits to return are calculated into vbits64.
1424 Also compute the pessimising value to be used when
1425 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1426 info can be gleaned from pessim64) but is used as a
1427 cross-check. */
1428 for (i = szB-1; i >= 0; i--) {
1429 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1430 ai = a + byte_offset_w(szB, bigendian, i);
1431 ok = get_vbits8(ai, &vbits8);
1432 vbits64 <<= 8;
1433 vbits64 |= vbits8;
1434 if (!ok) n_addrs_bad++;
1435 pessim64 <<= 8;
1436 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1439 /* In the common case, all the addresses involved are valid, so we
1440 just return the computed V bits and have done. */
1441 if (LIKELY(n_addrs_bad == 0))
1442 return vbits64;
1444 /* If there's no possibility of getting a partial-loads-ok
1445 exemption, report the error and quit. */
1446 if (!MC_(clo_partial_loads_ok)) {
1447 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1448 return vbits64;
1451 /* The partial-loads-ok excemption might apply. Find out if it
1452 does. If so, don't report an addressing error, but do return
1453 Undefined for the bytes that are out of range, so as to avoid
1454 false negatives. If it doesn't apply, just report an addressing
1455 error in the usual way. */
1457 /* Some code steps along byte strings in aligned word-sized chunks
1458 even when there is only a partially defined word at the end (eg,
1459 optimised strlen). This is allowed by the memory model of
1460 modern machines, since an aligned load cannot span two pages and
1461 thus cannot "partially fault". Despite such behaviour being
1462 declared undefined by ANSI C/C++.
1464 Therefore, a load from a partially-addressible place is allowed
1465 if all of the following hold:
1466 - the command-line flag is set [by default, it isn't]
1467 - it's a word-sized, word-aligned load
1468 - at least one of the addresses in the word *is* valid
1470 Since this suppresses the addressing error, we avoid false
1471 negatives by marking bytes undefined when they come from an
1472 invalid address.
1475 /* "at least one of the addresses is invalid" */
1476 tl_assert(pessim64 != V_BITS64_DEFINED);
1478 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1479 && n_addrs_bad < VG_WORDSIZE) {
1480 /* Exemption applies. Use the previously computed pessimising
1481 value for vbits64 and return the combined result, but don't
1482 flag an addressing error. The pessimising value is Defined
1483 for valid addresses and Undefined for invalid addresses. */
1484 /* for assumption that doing bitwise or implements UifU */
1485 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1486 /* (really need "UifU" here...)
1487 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1488 vbits64 |= pessim64;
1489 return vbits64;
1492 /* Also, in appears that gcc generates string-stepping code in
1493 32-bit chunks on 64 bit platforms. So, also grant an exception
1494 for this case. Note that the first clause of the conditional
1495 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1496 will get folded out in 32 bit builds. */
1497 if (VG_WORDSIZE == 8
1498 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1499 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1500 /* (really need "UifU" here...)
1501 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1502 vbits64 |= pessim64;
1503 /* Mark the upper 32 bits as undefined, just to be on the safe
1504 side. */
1505 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1506 return vbits64;
1509 /* Exemption doesn't apply. Flag an addressing error in the normal
1510 way. */
1511 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1513 return vbits64;
1517 static
1518 __attribute__((noinline))
1519 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1521 SizeT szB = nBits / 8;
1522 SizeT i, n_addrs_bad = 0;
1523 UChar vbits8;
1524 Addr ai;
1525 Bool ok;
1527 PROF_EVENT(MCPE_STOREVN_SLOW);
1529 /* ------------ BEGIN semi-fast cases ------------ */
1530 /* These deal quickly-ish with the common auxiliary primary map
1531 cases on 64-bit platforms. Are merely a speedup hack; can be
1532 omitted without loss of correctness/functionality. Note that in
1533 both cases the "sizeof(void*) == 8" causes these cases to be
1534 folded out by compilers on 32-bit platforms. The logic below
1535 is somewhat similar to some cases extensively commented in
1536 MC_(helperc_STOREV8).
1538 if (LIKELY(sizeof(void*) == 8
1539 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1540 SecMap* sm = get_secmap_for_reading(a);
1541 UWord sm_off16 = SM_OFF_16(a);
1542 UWord vabits16 = sm->vabits16[sm_off16];
1543 if (LIKELY( !is_distinguished_sm(sm) &&
1544 (VA_BITS16_DEFINED == vabits16 ||
1545 VA_BITS16_UNDEFINED == vabits16) )) {
1546 /* Handle common case quickly: a is suitably aligned, */
1547 /* is mapped, and is addressible. */
1548 // Convert full V-bits in register to compact 2-bit form.
1549 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1550 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
1551 return;
1552 } else if (V_BITS64_UNDEFINED == vbytes) {
1553 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
1554 return;
1556 /* else fall into the slow case */
1558 /* else fall into the slow case */
1560 if (LIKELY(sizeof(void*) == 8
1561 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1562 SecMap* sm = get_secmap_for_reading(a);
1563 UWord sm_off = SM_OFF(a);
1564 UWord vabits8 = sm->vabits8[sm_off];
1565 if (LIKELY( !is_distinguished_sm(sm) &&
1566 (VA_BITS8_DEFINED == vabits8 ||
1567 VA_BITS8_UNDEFINED == vabits8) )) {
1568 /* Handle common case quickly: a is suitably aligned, */
1569 /* is mapped, and is addressible. */
1570 // Convert full V-bits in register to compact 2-bit form.
1571 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1572 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1573 return;
1574 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1575 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1576 return;
1578 /* else fall into the slow case */
1580 /* else fall into the slow case */
1582 /* ------------ END semi-fast cases ------------ */
1584 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1586 /* Dump vbytes in memory, iterating from least to most significant
1587 byte. At the same time establish addressibility of the location. */
1588 for (i = 0; i < szB; i++) {
1589 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1590 ai = a + byte_offset_w(szB, bigendian, i);
1591 vbits8 = vbytes & 0xff;
1592 ok = set_vbits8(ai, vbits8);
1593 if (!ok) n_addrs_bad++;
1594 vbytes >>= 8;
1597 /* If an address error has happened, report it. */
1598 if (n_addrs_bad > 0)
1599 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1603 /*------------------------------------------------------------*/
1604 /*--- Setting permissions over address ranges. ---*/
1605 /*------------------------------------------------------------*/
1607 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1608 UWord dsm_num )
1610 UWord sm_off, sm_off16;
1611 UWord vabits2 = vabits16 & 0x3;
1612 SizeT lenA, lenB, len_to_next_secmap;
1613 Addr aNext;
1614 SecMap* sm;
1615 SecMap** sm_ptr;
1616 SecMap* example_dsm;
1618 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1620 /* Check the V+A bits make sense. */
1621 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1622 VA_BITS16_UNDEFINED == vabits16 ||
1623 VA_BITS16_DEFINED == vabits16);
1625 // This code should never write PDBs; ensure this. (See comment above
1626 // set_vabits2().)
1627 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1629 if (lenT == 0)
1630 return;
1632 if (lenT > 256 * 1024 * 1024) {
1633 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1634 const HChar* s = "unknown???";
1635 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1636 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1637 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1638 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1639 "large range [0x%lx, 0x%lx) (%s)\n",
1640 a, a + lenT, s);
1644 #ifndef PERF_FAST_SARP
1645 /*------------------ debug-only case ------------------ */
1647 // Endianness doesn't matter here because all bytes are being set to
1648 // the same value.
1649 // Nb: We don't have to worry about updating the sec-V-bits table
1650 // after these set_vabits2() calls because this code never writes
1651 // VA_BITS2_PARTDEFINED values.
1652 SizeT i;
1653 for (i = 0; i < lenT; i++) {
1654 set_vabits2(a + i, vabits2);
1656 return;
1658 #endif
1660 /*------------------ standard handling ------------------ */
1662 /* Get the distinguished secondary that we might want
1663 to use (part of the space-compression scheme). */
1664 example_dsm = &sm_distinguished[dsm_num];
1666 // We have to handle ranges covering various combinations of partial and
1667 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1668 // Cases marked with a '*' are common.
1670 // TYPE PARTS USED
1671 // ---- ----------
1672 // * one partial sec-map (p) 1
1673 // - one whole sec-map (P) 2
1675 // * two partial sec-maps (pp) 1,3
1676 // - one partial, one whole sec-map (pP) 1,2
1677 // - one whole, one partial sec-map (Pp) 2,3
1678 // - two whole sec-maps (PP) 2,2
1680 // * one partial, one whole, one partial (pPp) 1,2,3
1681 // - one partial, two whole (pPP) 1,2,2
1682 // - two whole, one partial (PPp) 2,2,3
1683 // - three whole (PPP) 2,2,2
1685 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1686 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1687 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1688 // - N whole (PP...PP) 2,2...2,3
1690 // Break up total length (lenT) into two parts: length in the first
1691 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1692 aNext = start_of_this_sm(a) + SM_SIZE;
1693 len_to_next_secmap = aNext - a;
1694 if ( lenT <= len_to_next_secmap ) {
1695 // Range entirely within one sec-map. Covers almost all cases.
1696 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1697 lenA = lenT;
1698 lenB = 0;
1699 } else if (is_start_of_sm(a)) {
1700 // Range spans at least one whole sec-map, and starts at the beginning
1701 // of a sec-map; skip to Part 2.
1702 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1703 lenA = 0;
1704 lenB = lenT;
1705 goto part2;
1706 } else {
1707 // Range spans two or more sec-maps, first one is partial.
1708 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1709 lenA = len_to_next_secmap;
1710 lenB = lenT - lenA;
1713 //------------------------------------------------------------------------
1714 // Part 1: Deal with the first sec_map. Most of the time the range will be
1715 // entirely within a sec_map and this part alone will suffice. Also,
1716 // doing it this way lets us avoid repeatedly testing for the crossing of
1717 // a sec-map boundary within these loops.
1718 //------------------------------------------------------------------------
1720 // If it's distinguished, make it undistinguished if necessary.
1721 sm_ptr = get_secmap_ptr(a);
1722 if (is_distinguished_sm(*sm_ptr)) {
1723 if (*sm_ptr == example_dsm) {
1724 // Sec-map already has the V+A bits that we want, so skip.
1725 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1726 a = aNext;
1727 lenA = 0;
1728 } else {
1729 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1730 *sm_ptr = copy_for_writing(*sm_ptr);
1733 sm = *sm_ptr;
1735 // 1 byte steps
1736 while (True) {
1737 if (VG_IS_8_ALIGNED(a)) break;
1738 if (lenA < 1) break;
1739 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1740 sm_off = SM_OFF(a);
1741 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1742 a += 1;
1743 lenA -= 1;
1745 // 8-aligned, 8 byte steps
1746 while (True) {
1747 if (lenA < 8) break;
1748 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1749 sm_off16 = SM_OFF_16(a);
1750 sm->vabits16[sm_off16] = vabits16;
1751 a += 8;
1752 lenA -= 8;
1754 // 1 byte steps
1755 while (True) {
1756 if (lenA < 1) break;
1757 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1758 sm_off = SM_OFF(a);
1759 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1760 a += 1;
1761 lenA -= 1;
1764 // We've finished the first sec-map. Is that it?
1765 if (lenB == 0)
1766 return;
1768 //------------------------------------------------------------------------
1769 // Part 2: Fast-set entire sec-maps at a time.
1770 //------------------------------------------------------------------------
1771 part2:
1772 // 64KB-aligned, 64KB steps.
1773 // Nb: we can reach here with lenB < SM_SIZE
1774 tl_assert(0 == lenA);
1775 while (True) {
1776 if (lenB < SM_SIZE) break;
1777 tl_assert(is_start_of_sm(a));
1778 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1779 sm_ptr = get_secmap_ptr(a);
1780 if (!is_distinguished_sm(*sm_ptr)) {
1781 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1782 // Free the non-distinguished sec-map that we're replacing. This
1783 // case happens moderately often, enough to be worthwhile.
1784 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1785 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1787 update_SM_counts(*sm_ptr, example_dsm);
1788 // Make the sec-map entry point to the example DSM
1789 *sm_ptr = example_dsm;
1790 lenB -= SM_SIZE;
1791 a += SM_SIZE;
1794 // We've finished the whole sec-maps. Is that it?
1795 if (lenB == 0)
1796 return;
1798 //------------------------------------------------------------------------
1799 // Part 3: Finish off the final partial sec-map, if necessary.
1800 //------------------------------------------------------------------------
1802 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1804 // If it's distinguished, make it undistinguished if necessary.
1805 sm_ptr = get_secmap_ptr(a);
1806 if (is_distinguished_sm(*sm_ptr)) {
1807 if (*sm_ptr == example_dsm) {
1808 // Sec-map already has the V+A bits that we want, so stop.
1809 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1810 return;
1811 } else {
1812 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1813 *sm_ptr = copy_for_writing(*sm_ptr);
1816 sm = *sm_ptr;
1818 // 8-aligned, 8 byte steps
1819 while (True) {
1820 if (lenB < 8) break;
1821 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1822 sm_off16 = SM_OFF_16(a);
1823 sm->vabits16[sm_off16] = vabits16;
1824 a += 8;
1825 lenB -= 8;
1827 // 1 byte steps
1828 while (True) {
1829 if (lenB < 1) return;
1830 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1831 sm_off = SM_OFF(a);
1832 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1833 a += 1;
1834 lenB -= 1;
1839 /* --- Set permissions for arbitrary address ranges --- */
1841 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1843 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1844 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1845 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1846 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1847 ocache_sarp_Clear_Origins ( a, len );
1850 static void make_mem_undefined ( Addr a, SizeT len )
1852 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1853 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1854 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1857 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1859 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1860 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1861 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1862 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1863 ocache_sarp_Set_Origins ( a, len, otag );
1866 static
1867 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1868 ThreadId tid, UInt okind )
1870 UInt ecu;
1871 ExeContext* here;
1872 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1873 if it is invalid. So no need to do it here. */
1874 tl_assert(okind <= 3);
1875 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1876 tl_assert(here);
1877 ecu = VG_(get_ECU_from_ExeContext)(here);
1878 tl_assert(VG_(is_plausible_ECU)(ecu));
1879 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1882 static
1883 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1885 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1888 static
1889 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1891 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1894 void MC_(make_mem_defined) ( Addr a, SizeT len )
1896 PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1897 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1898 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1899 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1900 ocache_sarp_Clear_Origins ( a, len );
1903 __attribute__((unused))
1904 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1906 MC_(make_mem_defined)(a, len);
1909 /* For each byte in [a,a+len), if the byte is addressable, make it be
1910 defined, but if it isn't addressible, leave it alone. In other
1911 words a version of MC_(make_mem_defined) that doesn't mess with
1912 addressibility. Low-performance implementation. */
1913 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1915 SizeT i;
1916 UChar vabits2;
1917 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1918 for (i = 0; i < len; i++) {
1919 vabits2 = get_vabits2( a+i );
1920 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1921 set_vabits2(a+i, VA_BITS2_DEFINED);
1922 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1923 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1929 /* Similarly (needed for mprotect handling ..) */
1930 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1932 SizeT i;
1933 UChar vabits2;
1934 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1935 for (i = 0; i < len; i++) {
1936 vabits2 = get_vabits2( a+i );
1937 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1938 set_vabits2(a+i, VA_BITS2_DEFINED);
1939 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1940 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1946 /* --- Block-copy permissions (needed for implementing realloc() and
1947 sys_mremap). --- */
1949 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1951 SizeT i, j;
1952 UChar vabits2, vabits8;
1953 Bool aligned, nooverlap;
1955 DEBUG("MC_(copy_address_range_state)\n");
1956 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
1958 if (len == 0 || src == dst)
1959 return;
1961 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1962 nooverlap = src+len <= dst || dst+len <= src;
1964 if (nooverlap && aligned) {
1966 /* Vectorised fast case, when no overlap and suitably aligned */
1967 /* vector loop */
1968 i = 0;
1969 while (len >= 4) {
1970 vabits8 = get_vabits8_for_aligned_word32( src+i );
1971 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1972 if (LIKELY(VA_BITS8_DEFINED == vabits8
1973 || VA_BITS8_UNDEFINED == vabits8
1974 || VA_BITS8_NOACCESS == vabits8)) {
1975 /* do nothing */
1976 } else {
1977 /* have to copy secondary map info */
1978 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1979 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1980 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1981 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1982 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1983 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1984 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1985 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1987 i += 4;
1988 len -= 4;
1990 /* fixup loop */
1991 while (len >= 1) {
1992 vabits2 = get_vabits2( src+i );
1993 set_vabits2( dst+i, vabits2 );
1994 if (VA_BITS2_PARTDEFINED == vabits2) {
1995 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1997 i++;
1998 len--;
2001 } else {
2003 /* We have to do things the slow way */
2004 if (src < dst) {
2005 for (i = 0, j = len-1; i < len; i++, j--) {
2006 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
2007 vabits2 = get_vabits2( src+j );
2008 set_vabits2( dst+j, vabits2 );
2009 if (VA_BITS2_PARTDEFINED == vabits2) {
2010 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
2015 if (src > dst) {
2016 for (i = 0; i < len; i++) {
2017 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
2018 vabits2 = get_vabits2( src+i );
2019 set_vabits2( dst+i, vabits2 );
2020 if (VA_BITS2_PARTDEFINED == vabits2) {
2021 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2030 /*------------------------------------------------------------*/
2031 /*--- Origin tracking stuff - cache basics ---*/
2032 /*------------------------------------------------------------*/
2034 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2035 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2037 Note that this implementation draws inspiration from the "origin
2038 tracking by value piggybacking" scheme described in "Tracking Bad
2039 Apples: Reporting the Origin of Null and Undefined Value Errors"
2040 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2041 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2042 implemented completely differently.
2044 Origin tags and ECUs -- about the shadow values
2045 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2047 This implementation tracks the defining point of all uninitialised
2048 values using so called "origin tags", which are 32-bit integers,
2049 rather than using the values themselves to encode the origins. The
2050 latter, so-called value piggybacking", is what the OOPSLA07 paper
2051 describes.
2053 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2054 ints (UInts), regardless of the machine's word size. Each tag
2055 comprises an upper 30-bit ECU field and a lower 2-bit
2056 'kind' field. The ECU field is a number given out by m_execontext
2057 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2058 directly as an origin tag (otag), but in fact we want to put
2059 additional information 'kind' field to indicate roughly where the
2060 tag came from. This helps print more understandable error messages
2061 for the user -- it has no other purpose. In summary:
2063 * Both ECUs and origin tags are represented as 32-bit words
2065 * m_execontext and the core-tool interface deal purely in ECUs.
2066 They have no knowledge of origin tags - that is a purely
2067 Memcheck-internal matter.
2069 * all valid ECUs have the lowest 2 bits zero and at least
2070 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2072 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2073 constants defined in mc_include.h.
2075 * to convert an otag back to an ECU, AND it with ~3
2077 One important fact is that no valid otag is zero. A zero otag is
2078 used by the implementation to indicate "no origin", which could
2079 mean that either the value is defined, or it is undefined but the
2080 implementation somehow managed to lose the origin.
2082 The ECU used for memory created by malloc etc is derived from the
2083 stack trace at the time the malloc etc happens. This means the
2084 mechanism can show the exact allocation point for heap-created
2085 uninitialised values.
2087 In contrast, it is simply too expensive to create a complete
2088 backtrace for each stack allocation. Therefore we merely use a
2089 depth-1 backtrace for stack allocations, which can be done once at
2090 translation time, rather than N times at run time. The result of
2091 this is that, for stack created uninitialised values, Memcheck can
2092 only show the allocating function, and not what called it.
2093 Furthermore, compilers tend to move the stack pointer just once at
2094 the start of the function, to allocate all locals, and so in fact
2095 the stack origin almost always simply points to the opening brace
2096 of the function. Net result is, for stack origins, the mechanism
2097 can tell you in which function the undefined value was created, but
2098 that's all. Users will need to carefully check all locals in the
2099 specified function.
2101 Shadowing registers and memory
2102 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2104 Memory is shadowed using a two level cache structure (ocacheL1 and
2105 ocacheL2). Memory references are first directed to ocacheL1. This
2106 is a traditional 2-way set associative cache with 32-byte lines and
2107 approximate LRU replacement within each set.
2109 A naive implementation would require storing one 32 bit otag for
2110 each byte of memory covered, a 4:1 space overhead. Instead, there
2111 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2112 that shows which of the 4 bytes have that shadow value and which
2113 have a shadow value of zero (indicating no origin). Hence a lot of
2114 space is saved, but the cost is that only one different origin per
2115 4 bytes of address space can be represented. This is a source of
2116 imprecision, but how much of a problem it really is remains to be
2117 seen.
2119 A cache line that contains all zeroes ("no origins") contains no
2120 useful information, and can be ejected from the L1 cache "for
2121 free", in the sense that a read miss on the L1 causes a line of
2122 zeroes to be installed. However, ejecting a line containing
2123 nonzeroes risks losing origin information permanently. In order to
2124 prevent such lossage, ejected nonzero lines are placed in a
2125 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2126 lines. This can grow arbitrarily large, and so should ensure that
2127 Memcheck runs out of memory in preference to losing useful origin
2128 info due to cache size limitations.
2130 Shadowing registers is a bit tricky, because the shadow values are
2131 32 bits, regardless of the size of the register. That gives a
2132 problem for registers smaller than 32 bits. The solution is to
2133 find spaces in the guest state that are unused, and use those to
2134 shadow guest state fragments smaller than 32 bits. For example, on
2135 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2136 shadow are allocated for the register's otag, then there are still
2137 12 bytes left over which could be used to shadow 3 other values.
2139 This implies there is some non-obvious mapping from guest state
2140 (start,length) pairs to the relevant shadow offset (for the origin
2141 tags). And it is unfortunately guest-architecture specific. The
2142 mapping is contained in mc_machine.c, which is quite lengthy but
2143 straightforward.
2145 Instrumenting the IR
2146 ~~~~~~~~~~~~~~~~~~~~
2148 Instrumentation is largely straightforward, and done by the
2149 functions schemeE and schemeS in mc_translate.c. These generate
2150 code for handling the origin tags of expressions (E) and statements
2151 (S) respectively. The rather strange names are a reference to the
2152 "compilation schemes" shown in Simon Peyton Jones' book "The
2153 Implementation of Functional Programming Languages" (Prentice Hall,
2154 1987, see
2155 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2157 schemeS merely arranges to move shadow values around the guest
2158 state to track the incoming IR. schemeE is largely trivial too.
2159 The only significant point is how to compute the otag corresponding
2160 to binary (or ternary, quaternary, etc) operator applications. The
2161 rule is simple: just take whichever value is larger (32-bit
2162 unsigned max). Constants get the special value zero. Hence this
2163 rule always propagates a nonzero (known) otag in preference to a
2164 zero (unknown, or more likely, value-is-defined) tag, as we want.
2165 If two different undefined values are inputs to a binary operator
2166 application, then which is propagated is arbitrary, but that
2167 doesn't matter, since the program is erroneous in using either of
2168 the values, and so there's no point in attempting to propagate
2169 both.
2171 Since constants are abstracted to (otag) zero, much of the
2172 instrumentation code can be folded out without difficulty by the
2173 generic post-instrumentation IR cleanup pass, using these rules:
2174 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2175 constants is evaluated at JIT time. And the resulting dead code
2176 removal. In practice this causes surprisingly few Max32Us to
2177 survive through to backend code generation.
2179 Integration with the V-bits machinery
2180 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2182 This is again largely straightforward. Mostly the otag and V bits
2183 stuff are independent. The only point of interaction is when the V
2184 bits instrumenter creates a call to a helper function to report an
2185 uninitialised value error -- in that case it must first use schemeE
2186 to get hold of the origin tag expression for the value, and pass
2187 that to the helper too.
2189 There is the usual stuff to do with setting address range
2190 permissions. When memory is painted undefined, we must also know
2191 the origin tag to paint with, which involves some tedious plumbing,
2192 particularly to do with the fast case stack handlers. When memory
2193 is painted defined or noaccess then the origin tags must be forced
2194 to zero.
2196 One of the goals of the implementation was to ensure that the
2197 non-origin tracking mode isn't slowed down at all. To do this,
2198 various functions to do with memory permissions setting (again,
2199 mostly pertaining to the stack) are duplicated for the with- and
2200 without-otag case.
2202 Dealing with stack redzones, and the NIA cache
2203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2205 This is one of the few non-obvious parts of the implementation.
2207 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2208 reserved area below the stack pointer, that can be used as scratch
2209 space by compiler generated code for functions. In the Memcheck
2210 sources this is referred to as the "stack redzone". The important
2211 thing here is that such redzones are considered volatile across
2212 function calls and returns. So Memcheck takes care to mark them as
2213 undefined for each call and return, on the afflicted platforms.
2214 Past experience shows this is essential in order to get reliable
2215 messages about uninitialised values that come from the stack.
2217 So the question is, when we paint a redzone undefined, what origin
2218 tag should we use for it? Consider a function f() calling g(). If
2219 we paint the redzone using an otag derived from the ExeContext of
2220 the CALL/BL instruction in f, then any errors in g causing it to
2221 use uninitialised values that happen to lie in the redzone, will be
2222 reported as having their origin in f. Which is highly confusing.
2224 The same applies for returns: if, on a return, we paint the redzone
2225 using a origin tag derived from the ExeContext of the RET/BLR
2226 instruction in g, then any later errors in f causing it to use
2227 uninitialised values in the redzone, will be reported as having
2228 their origin in g. Which is just as confusing.
2230 To do it right, in both cases we need to use an origin tag which
2231 pertains to the instruction which dynamically follows the CALL/BL
2232 or RET/BLR. In short, one derived from the NIA - the "next
2233 instruction address".
2235 To make this work, Memcheck's redzone-painting helper,
2236 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2237 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2238 ExeContext's ECU as the basis for the otag used to paint the
2239 redzone. The expensive part of this is converting an NIA into an
2240 ECU, since this happens once for every call and every return. So
2241 we use a simple 511-line, 2-way set associative cache
2242 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2243 the cost out.
2245 Further background comments
2246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2248 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2249 > it really just the address of the relevant ExeContext?
2251 Well, it's not the address, but a value which has a 1-1 mapping
2252 with ExeContexts, and is guaranteed not to be zero, since zero
2253 denotes (to memcheck) "unknown origin or defined value". So these
2254 UInts are just numbers starting at 4 and incrementing by 4; each
2255 ExeContext is given a number when it is created. (*** NOTE this
2256 confuses otags and ECUs; see comments above ***).
2258 Making these otags 32-bit regardless of the machine's word size
2259 makes the 64-bit implementation easier (next para). And it doesn't
2260 really limit us in any way, since for the tags to overflow would
2261 require that the program somehow caused 2^30-1 different
2262 ExeContexts to be created, in which case it is probably in deep
2263 trouble. Not to mention V will have soaked up many tens of
2264 gigabytes of memory merely to store them all.
2266 So having 64-bit origins doesn't really buy you anything, and has
2267 the following downsides:
2269 Suppose that instead, an otag is a UWord. This would mean that, on
2270 a 64-bit target,
2272 1. It becomes hard to shadow any element of guest state which is
2273 smaller than 8 bytes. To do so means you'd need to find some
2274 8-byte-sized hole in the guest state which you don't want to
2275 shadow, and use that instead to hold the otag. On ppc64, the
2276 condition code register(s) are split into 20 UChar sized pieces,
2277 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2278 and so that would entail finding 160 bytes somewhere else in the
2279 guest state.
2281 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2282 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2283 same) and so I had to look for 4 untracked otag-sized areas in
2284 the guest state to make that possible.
2286 The same problem exists of course when origin tags are only 32
2287 bits, but it's less extreme.
2289 2. (More compelling) it doubles the size of the origin shadow
2290 memory. Given that the shadow memory is organised as a fixed
2291 size cache, and that accuracy of tracking is limited by origins
2292 falling out the cache due to space conflicts, this isn't good.
2294 > Another question: is the origin tracking perfect, or are there
2295 > cases where it fails to determine an origin?
2297 It is imperfect for at least for the following reasons, and
2298 probably more:
2300 * Insufficient capacity in the origin cache. When a line is
2301 evicted from the cache it is gone forever, and so subsequent
2302 queries for the line produce zero, indicating no origin
2303 information. Interestingly, a line containing all zeroes can be
2304 evicted "free" from the cache, since it contains no useful
2305 information, so there is scope perhaps for some cleverer cache
2306 management schemes. (*** NOTE, with the introduction of the
2307 second level origin tag cache, ocacheL2, this is no longer a
2308 problem. ***)
2310 * The origin cache only stores one otag per 32-bits of address
2311 space, plus 4 bits indicating which of the 4 bytes has that tag
2312 and which are considered defined. The result is that if two
2313 undefined bytes in the same word are stored in memory, the first
2314 stored byte's origin will be lost and replaced by the origin for
2315 the second byte.
2317 * Nonzero origin tags for defined values. Consider a binary
2318 operator application op(x,y). Suppose y is undefined (and so has
2319 a valid nonzero origin tag), and x is defined, but erroneously
2320 has a nonzero origin tag (defined values should have tag zero).
2321 If the erroneous tag has a numeric value greater than y's tag,
2322 then the rule for propagating origin tags though binary
2323 operations, which is simply to take the unsigned max of the two
2324 tags, will erroneously propagate x's tag rather than y's.
2326 * Some obscure uses of x86/amd64 byte registers can cause lossage
2327 or confusion of origins. %AH .. %DH are treated as different
2328 from, and unrelated to, their parent registers, %EAX .. %EDX.
2329 So some weird sequences like
2331 movb undefined-value, %AH
2332 movb defined-value, %AL
2333 .. use %AX or %EAX ..
2335 will cause the origin attributed to %AH to be ignored, since %AL,
2336 %AX, %EAX are treated as the same register, and %AH as a
2337 completely separate one.
2339 But having said all that, it actually seems to work fairly well in
2340 practice.
2343 static UWord stats_ocacheL1_find = 0;
2344 static UWord stats_ocacheL1_found_at_1 = 0;
2345 static UWord stats_ocacheL1_found_at_N = 0;
2346 static UWord stats_ocacheL1_misses = 0;
2347 static UWord stats_ocacheL1_lossage = 0;
2348 static UWord stats_ocacheL1_movefwds = 0;
2350 static UWord stats__ocacheL2_refs = 0;
2351 static UWord stats__ocacheL2_misses = 0;
2352 static UWord stats__ocacheL2_n_nodes_max = 0;
2354 /* Cache of 32-bit values, one every 32 bits of address space */
2356 #define OC_BITS_PER_LINE 5
2357 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2359 static INLINE UWord oc_line_offset ( Addr a ) {
2360 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2362 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2363 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2366 #define OC_LINES_PER_SET 2
2368 #define OC_N_SET_BITS 20
2369 #define OC_N_SETS (1 << OC_N_SET_BITS)
2371 /* These settings give:
2372 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2373 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2376 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2379 typedef
2380 struct {
2381 Addr tag;
2382 UInt w32[OC_W32S_PER_LINE];
2383 UChar descr[OC_W32S_PER_LINE];
2385 OCacheLine;
2387 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2388 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2389 and 'z' if all the represented tags are zero. */
2390 static UChar classify_OCacheLine ( OCacheLine* line )
2392 UWord i;
2393 if (line->tag == 1/*invalid*/)
2394 return 'e'; /* EMPTY */
2395 tl_assert(is_valid_oc_tag(line->tag));
2396 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2397 tl_assert(0 == ((~0xF) & line->descr[i]));
2398 if (line->w32[i] > 0 && line->descr[i] > 0)
2399 return 'n'; /* NONZERO - contains useful info */
2401 return 'z'; /* ZERO - no useful info */
2404 typedef
2405 struct {
2406 OCacheLine line[OC_LINES_PER_SET];
2408 OCacheSet;
2410 typedef
2411 struct {
2412 OCacheSet set[OC_N_SETS];
2414 OCache;
2416 static OCache* ocacheL1 = NULL;
2417 static UWord ocacheL1_event_ctr = 0;
2419 static void init_ocacheL2 ( void ); /* fwds */
2420 static void init_OCache ( void )
2422 UWord line, set;
2423 tl_assert(MC_(clo_mc_level) >= 3);
2424 tl_assert(ocacheL1 == NULL);
2425 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2426 if (ocacheL1 == NULL) {
2427 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2428 sizeof(OCache) );
2430 tl_assert(ocacheL1 != NULL);
2431 for (set = 0; set < OC_N_SETS; set++) {
2432 for (line = 0; line < OC_LINES_PER_SET; line++) {
2433 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2436 init_ocacheL2();
2439 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2441 OCacheLine tmp;
2442 stats_ocacheL1_movefwds++;
2443 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2444 tmp = set->line[lineno-1];
2445 set->line[lineno-1] = set->line[lineno];
2446 set->line[lineno] = tmp;
2449 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2450 UWord i;
2451 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2452 line->w32[i] = 0; /* NO ORIGIN */
2453 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2455 line->tag = tag;
2458 //////////////////////////////////////////////////////////////
2459 //// OCache backing store
2461 static OSet* ocacheL2 = NULL;
2463 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2464 return VG_(malloc)(cc, szB);
2466 static void ocacheL2_free ( void* v ) {
2467 VG_(free)( v );
2470 /* Stats: # nodes currently in tree */
2471 static UWord stats__ocacheL2_n_nodes = 0;
2473 static void init_ocacheL2 ( void )
2475 tl_assert(!ocacheL2);
2476 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2477 tl_assert(0 == offsetof(OCacheLine,tag));
2478 ocacheL2
2479 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2480 NULL, /* fast cmp */
2481 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2482 stats__ocacheL2_n_nodes = 0;
2485 /* Find line with the given tag in the tree, or NULL if not found. */
2486 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2488 OCacheLine* line;
2489 tl_assert(is_valid_oc_tag(tag));
2490 stats__ocacheL2_refs++;
2491 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2492 return line;
2495 /* Delete the line with the given tag from the tree, if it is present, and
2496 free up the associated memory. */
2497 static void ocacheL2_del_tag ( Addr tag )
2499 OCacheLine* line;
2500 tl_assert(is_valid_oc_tag(tag));
2501 stats__ocacheL2_refs++;
2502 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2503 if (line) {
2504 VG_(OSetGen_FreeNode)(ocacheL2, line);
2505 tl_assert(stats__ocacheL2_n_nodes > 0);
2506 stats__ocacheL2_n_nodes--;
2510 /* Add a copy of the given line to the tree. It must not already be
2511 present. */
2512 static void ocacheL2_add_line ( OCacheLine* line )
2514 OCacheLine* copy;
2515 tl_assert(is_valid_oc_tag(line->tag));
2516 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2517 *copy = *line;
2518 stats__ocacheL2_refs++;
2519 VG_(OSetGen_Insert)( ocacheL2, copy );
2520 stats__ocacheL2_n_nodes++;
2521 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2522 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2525 ////
2526 //////////////////////////////////////////////////////////////
2528 __attribute__((noinline))
2529 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2531 OCacheLine *victim, *inL2;
2532 UChar c;
2533 UWord line;
2534 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2535 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2536 UWord tag = a & tagmask;
2537 tl_assert(setno >= 0 && setno < OC_N_SETS);
2539 /* we already tried line == 0; skip therefore. */
2540 for (line = 1; line < OC_LINES_PER_SET; line++) {
2541 if (ocacheL1->set[setno].line[line].tag == tag) {
2542 if (line == 1) {
2543 stats_ocacheL1_found_at_1++;
2544 } else {
2545 stats_ocacheL1_found_at_N++;
2547 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2548 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2549 moveLineForwards( &ocacheL1->set[setno], line );
2550 line--;
2552 return &ocacheL1->set[setno].line[line];
2556 /* A miss. Use the last slot. Implicitly this means we're
2557 ejecting the line in the last slot. */
2558 stats_ocacheL1_misses++;
2559 tl_assert(line == OC_LINES_PER_SET);
2560 line--;
2561 tl_assert(line > 0);
2563 /* First, move the to-be-ejected line to the L2 cache. */
2564 victim = &ocacheL1->set[setno].line[line];
2565 c = classify_OCacheLine(victim);
2566 switch (c) {
2567 case 'e':
2568 /* the line is empty (has invalid tag); ignore it. */
2569 break;
2570 case 'z':
2571 /* line contains zeroes. We must ensure the backing store is
2572 updated accordingly, either by copying the line there
2573 verbatim, or by ensuring it isn't present there. We
2574 chosse the latter on the basis that it reduces the size of
2575 the backing store. */
2576 ocacheL2_del_tag( victim->tag );
2577 break;
2578 case 'n':
2579 /* line contains at least one real, useful origin. Copy it
2580 to the backing store. */
2581 stats_ocacheL1_lossage++;
2582 inL2 = ocacheL2_find_tag( victim->tag );
2583 if (inL2) {
2584 *inL2 = *victim;
2585 } else {
2586 ocacheL2_add_line( victim );
2588 break;
2589 default:
2590 tl_assert(0);
2593 /* Now we must reload the L1 cache from the backing tree, if
2594 possible. */
2595 tl_assert(tag != victim->tag); /* stay sane */
2596 inL2 = ocacheL2_find_tag( tag );
2597 if (inL2) {
2598 /* We're in luck. It's in the L2. */
2599 ocacheL1->set[setno].line[line] = *inL2;
2600 } else {
2601 /* Missed at both levels of the cache hierarchy. We have to
2602 declare it as full of zeroes (unknown origins). */
2603 stats__ocacheL2_misses++;
2604 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2607 /* Move it one forwards */
2608 moveLineForwards( &ocacheL1->set[setno], line );
2609 line--;
2611 return &ocacheL1->set[setno].line[line];
2614 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2616 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2617 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2618 UWord tag = a & tagmask;
2620 stats_ocacheL1_find++;
2622 if (OC_ENABLE_ASSERTIONS) {
2623 tl_assert(setno >= 0 && setno < OC_N_SETS);
2624 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2627 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2628 return &ocacheL1->set[setno].line[0];
2631 return find_OCacheLine_SLOW( a );
2634 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2636 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2637 //// Set the origins for a+0 .. a+7
2638 { OCacheLine* line;
2639 UWord lineoff = oc_line_offset(a);
2640 if (OC_ENABLE_ASSERTIONS) {
2641 tl_assert(lineoff >= 0
2642 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2644 line = find_OCacheLine( a );
2645 line->descr[lineoff+0] = 0xF;
2646 line->descr[lineoff+1] = 0xF;
2647 line->w32[lineoff+0] = otag;
2648 line->w32[lineoff+1] = otag;
2650 //// END inlined, specialised version of MC_(helperc_b_store8)
2654 /*------------------------------------------------------------*/
2655 /*--- Aligned fast case permission setters, ---*/
2656 /*--- for dealing with stacks ---*/
2657 /*------------------------------------------------------------*/
2659 /*--------------------- 32-bit ---------------------*/
2661 /* Nb: by "aligned" here we mean 4-byte aligned */
2663 static INLINE void make_aligned_word32_undefined ( Addr a )
2665 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2667 #ifndef PERF_FAST_STACK2
2668 make_mem_undefined(a, 4);
2669 #else
2671 UWord sm_off;
2672 SecMap* sm;
2674 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2675 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2676 make_mem_undefined(a, 4);
2677 return;
2680 sm = get_secmap_for_writing_low(a);
2681 sm_off = SM_OFF(a);
2682 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2684 #endif
2687 static INLINE
2688 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2690 make_aligned_word32_undefined(a);
2691 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2692 //// Set the origins for a+0 .. a+3
2693 { OCacheLine* line;
2694 UWord lineoff = oc_line_offset(a);
2695 if (OC_ENABLE_ASSERTIONS) {
2696 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2698 line = find_OCacheLine( a );
2699 line->descr[lineoff] = 0xF;
2700 line->w32[lineoff] = otag;
2702 //// END inlined, specialised version of MC_(helperc_b_store4)
2705 static INLINE
2706 void make_aligned_word32_noaccess ( Addr a )
2708 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2710 #ifndef PERF_FAST_STACK2
2711 MC_(make_mem_noaccess)(a, 4);
2712 #else
2714 UWord sm_off;
2715 SecMap* sm;
2717 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2718 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2719 MC_(make_mem_noaccess)(a, 4);
2720 return;
2723 sm = get_secmap_for_writing_low(a);
2724 sm_off = SM_OFF(a);
2725 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2727 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2728 //// Set the origins for a+0 .. a+3.
2729 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2730 OCacheLine* line;
2731 UWord lineoff = oc_line_offset(a);
2732 if (OC_ENABLE_ASSERTIONS) {
2733 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2735 line = find_OCacheLine( a );
2736 line->descr[lineoff] = 0;
2738 //// END inlined, specialised version of MC_(helperc_b_store4)
2740 #endif
2743 /*--------------------- 64-bit ---------------------*/
2745 /* Nb: by "aligned" here we mean 8-byte aligned */
2747 static INLINE void make_aligned_word64_undefined ( Addr a )
2749 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2751 #ifndef PERF_FAST_STACK2
2752 make_mem_undefined(a, 8);
2753 #else
2755 UWord sm_off16;
2756 SecMap* sm;
2758 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2759 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2760 make_mem_undefined(a, 8);
2761 return;
2764 sm = get_secmap_for_writing_low(a);
2765 sm_off16 = SM_OFF_16(a);
2766 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
2768 #endif
2771 static INLINE
2772 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2774 make_aligned_word64_undefined(a);
2775 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2776 //// Set the origins for a+0 .. a+7
2777 { OCacheLine* line;
2778 UWord lineoff = oc_line_offset(a);
2779 tl_assert(lineoff >= 0
2780 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2781 line = find_OCacheLine( a );
2782 line->descr[lineoff+0] = 0xF;
2783 line->descr[lineoff+1] = 0xF;
2784 line->w32[lineoff+0] = otag;
2785 line->w32[lineoff+1] = otag;
2787 //// END inlined, specialised version of MC_(helperc_b_store8)
2790 static INLINE
2791 void make_aligned_word64_noaccess ( Addr a )
2793 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2795 #ifndef PERF_FAST_STACK2
2796 MC_(make_mem_noaccess)(a, 8);
2797 #else
2799 UWord sm_off16;
2800 SecMap* sm;
2802 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2803 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2804 MC_(make_mem_noaccess)(a, 8);
2805 return;
2808 sm = get_secmap_for_writing_low(a);
2809 sm_off16 = SM_OFF_16(a);
2810 sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
2812 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2813 //// Clear the origins for a+0 .. a+7.
2814 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2815 OCacheLine* line;
2816 UWord lineoff = oc_line_offset(a);
2817 tl_assert(lineoff >= 0
2818 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2819 line = find_OCacheLine( a );
2820 line->descr[lineoff+0] = 0;
2821 line->descr[lineoff+1] = 0;
2823 //// END inlined, specialised version of MC_(helperc_b_store8)
2825 #endif
2829 /*------------------------------------------------------------*/
2830 /*--- Stack pointer adjustment ---*/
2831 /*------------------------------------------------------------*/
2833 #ifdef PERF_FAST_STACK
2834 # define MAYBE_USED
2835 #else
2836 # define MAYBE_USED __attribute__((unused))
2837 #endif
2839 /*--------------- adjustment by 4 bytes ---------------*/
2841 MAYBE_USED
2842 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2844 UInt otag = ecu | MC_OKIND_STACK;
2845 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2846 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2847 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2848 } else {
2849 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2853 MAYBE_USED
2854 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2856 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2857 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2858 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2859 } else {
2860 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2864 MAYBE_USED
2865 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2867 PROF_EVENT(MCPE_DIE_MEM_STACK_4);
2868 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2869 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2870 } else {
2871 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2875 /*--------------- adjustment by 8 bytes ---------------*/
2877 MAYBE_USED
2878 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2880 UInt otag = ecu | MC_OKIND_STACK;
2881 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2882 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2883 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2884 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2885 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2886 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2887 } else {
2888 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2892 MAYBE_USED
2893 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2895 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2896 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2897 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2898 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2899 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2900 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2901 } else {
2902 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2906 MAYBE_USED
2907 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2909 PROF_EVENT(MCPE_DIE_MEM_STACK_8);
2910 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2912 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2913 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2914 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2915 } else {
2916 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2920 /*--------------- adjustment by 12 bytes ---------------*/
2922 MAYBE_USED
2923 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2925 UInt otag = ecu | MC_OKIND_STACK;
2926 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2927 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2928 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2929 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2930 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2931 /* from previous test we don't have 8-alignment at offset +0,
2932 hence must have 8 alignment at offsets +4/-4. Hence safe to
2933 do 4 at +0 and then 8 at +4/. */
2934 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2936 } else {
2937 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2941 MAYBE_USED
2942 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2944 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2945 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2946 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2947 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2948 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2949 /* from previous test we don't have 8-alignment at offset +0,
2950 hence must have 8 alignment at offsets +4/-4. Hence safe to
2951 do 4 at +0 and then 8 at +4/. */
2952 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2954 } else {
2955 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2959 MAYBE_USED
2960 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2962 PROF_EVENT(MCPE_DIE_MEM_STACK_12);
2963 /* Note the -12 in the test */
2964 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2965 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2966 -4. */
2967 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2968 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2969 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2970 /* We have 4-alignment at +0, but we don't have 8-alignment at
2971 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2972 and then 8 at -8. */
2973 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2974 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2975 } else {
2976 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2980 /*--------------- adjustment by 16 bytes ---------------*/
2982 MAYBE_USED
2983 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2985 UInt otag = ecu | MC_OKIND_STACK;
2986 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
2987 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2988 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2989 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2990 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2991 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2992 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2993 Hence do 4 at +0, 8 at +4, 4 at +12. */
2994 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2996 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2997 } else {
2998 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
3002 MAYBE_USED
3003 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
3005 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3006 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3008 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3009 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3010 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3011 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3012 Hence do 4 at +0, 8 at +4, 4 at +12. */
3013 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3014 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3015 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3016 } else {
3017 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
3021 MAYBE_USED
3022 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
3024 PROF_EVENT(MCPE_DIE_MEM_STACK_16);
3025 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3026 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3027 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3028 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3029 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3031 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3032 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3033 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3034 } else {
3035 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
3039 /*--------------- adjustment by 32 bytes ---------------*/
3041 MAYBE_USED
3042 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
3044 UInt otag = ecu | MC_OKIND_STACK;
3045 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3046 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3047 /* Straightforward */
3048 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3049 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3050 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3051 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3052 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3053 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3054 +0,+28. */
3055 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3056 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3057 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3058 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3059 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3060 } else {
3061 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3065 MAYBE_USED
3066 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3068 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3069 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3070 /* Straightforward */
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3072 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3073 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3074 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3075 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3076 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3077 +0,+28. */
3078 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3079 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3080 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3081 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3082 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3083 } else {
3084 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3088 MAYBE_USED
3089 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3091 PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3092 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3093 /* Straightforward */
3094 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3095 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3096 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3097 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3098 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3099 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3100 4 at -32,-4. */
3101 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3102 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3103 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3104 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3105 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3106 } else {
3107 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3111 /*--------------- adjustment by 112 bytes ---------------*/
3113 MAYBE_USED
3114 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3116 UInt otag = ecu | MC_OKIND_STACK;
3117 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3118 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3119 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3120 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3121 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3122 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3123 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3124 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3125 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3126 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3127 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3128 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3129 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3130 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3131 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3132 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3133 } else {
3134 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3138 MAYBE_USED
3139 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3141 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3142 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3144 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3145 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3146 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3147 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3148 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3149 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3150 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3151 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3152 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3153 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3154 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3155 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3156 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3157 } else {
3158 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3162 MAYBE_USED
3163 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3165 PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3166 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3174 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3175 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3176 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3177 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3178 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3179 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3180 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3181 } else {
3182 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3186 /*--------------- adjustment by 128 bytes ---------------*/
3188 MAYBE_USED
3189 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3191 UInt otag = ecu | MC_OKIND_STACK;
3192 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3193 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3194 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3195 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3196 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3197 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3198 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3199 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3200 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3201 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3202 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3203 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3204 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3205 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3206 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3207 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3208 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3209 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3210 } else {
3211 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3215 MAYBE_USED
3216 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3218 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3219 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3220 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3221 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3222 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3223 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3224 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3225 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3226 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3227 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3228 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3229 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3230 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3231 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3232 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3233 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3234 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3235 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3236 } else {
3237 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3241 MAYBE_USED
3242 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3244 PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3245 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3246 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3247 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3248 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3249 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3250 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3251 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3252 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3253 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3254 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3255 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3256 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3257 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3258 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3259 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3260 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3261 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3262 } else {
3263 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3267 /*--------------- adjustment by 144 bytes ---------------*/
3269 MAYBE_USED
3270 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3272 UInt otag = ecu | MC_OKIND_STACK;
3273 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3274 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3275 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3276 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3277 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3278 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3279 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3280 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3281 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3282 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3283 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3284 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3285 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3286 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3287 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3288 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3293 } else {
3294 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3298 MAYBE_USED
3299 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3301 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3302 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3303 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3304 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3305 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3306 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3307 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3308 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3309 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3310 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3311 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3312 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3321 } else {
3322 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3326 MAYBE_USED
3327 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3329 PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3330 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3331 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3332 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3333 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3334 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3335 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3336 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3349 } else {
3350 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3354 /*--------------- adjustment by 160 bytes ---------------*/
3356 MAYBE_USED
3357 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3359 UInt otag = ecu | MC_OKIND_STACK;
3360 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3361 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3362 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3363 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3379 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3380 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3381 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3382 } else {
3383 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3387 MAYBE_USED
3388 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3390 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3391 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3406 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3407 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3408 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3409 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3410 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3411 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3412 } else {
3413 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3417 MAYBE_USED
3418 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3420 PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3421 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3432 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3433 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3434 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3435 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3436 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3437 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3438 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3439 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3440 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3441 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3442 } else {
3443 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3447 /*--------------- adjustment by N bytes ---------------*/
3449 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3451 UInt otag = ecu | MC_OKIND_STACK;
3452 PROF_EVENT(MCPE_NEW_MEM_STACK);
3453 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3456 static void mc_new_mem_stack ( Addr a, SizeT len )
3458 PROF_EVENT(MCPE_NEW_MEM_STACK);
3459 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3462 static void mc_die_mem_stack ( Addr a, SizeT len )
3464 PROF_EVENT(MCPE_DIE_MEM_STACK);
3465 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3469 /* The AMD64 ABI says:
3471 "The 128-byte area beyond the location pointed to by %rsp is considered
3472 to be reserved and shall not be modified by signal or interrupt
3473 handlers. Therefore, functions may use this area for temporary data
3474 that is not needed across function calls. In particular, leaf functions
3475 may use this area for their entire stack frame, rather than adjusting
3476 the stack pointer in the prologue and epilogue. This area is known as
3477 red zone [sic]."
3479 So after any call or return we need to mark this redzone as containing
3480 undefined values.
3482 Consider this: we're in function f. f calls g. g moves rsp down
3483 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3484 defined. g returns. f is buggy and reads from parts of the red zone
3485 that it didn't write on. But because g filled that area in, f is going
3486 to be picking up defined V bits and so any errors from reading bits of
3487 the red zone it didn't write, will be missed. The only solution I could
3488 think of was to make the red zone undefined when g returns to f.
3490 This is in accordance with the ABI, which makes it clear the redzone
3491 is volatile across function calls.
3493 The problem occurs the other way round too: f could fill the RZ up
3494 with defined values and g could mistakenly read them. So the RZ
3495 also needs to be nuked on function calls.
3499 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3500 improved so as to have a lower miss rate. */
3502 static UWord stats__nia_cache_queries = 0;
3503 static UWord stats__nia_cache_misses = 0;
3505 typedef
3506 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3507 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3508 WCacheEnt;
3510 #define N_NIA_TO_ECU_CACHE 511
3512 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3514 static void init_nia_to_ecu_cache ( void )
3516 UWord i;
3517 Addr zero_addr = 0;
3518 ExeContext* zero_ec;
3519 UInt zero_ecu;
3520 /* Fill all the slots with an entry for address zero, and the
3521 relevant otags accordingly. Hence the cache is initially filled
3522 with valid data. */
3523 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3524 tl_assert(zero_ec);
3525 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3526 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3527 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3528 nia_to_ecu_cache[i].nia0 = zero_addr;
3529 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3530 nia_to_ecu_cache[i].nia1 = zero_addr;
3531 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3535 static inline UInt convert_nia_to_ecu ( Addr nia )
3537 UWord i;
3538 UInt ecu;
3539 ExeContext* ec;
3541 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3543 stats__nia_cache_queries++;
3544 i = nia % N_NIA_TO_ECU_CACHE;
3545 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3547 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3548 return nia_to_ecu_cache[i].ecu0;
3550 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3551 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3552 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3553 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3554 # undef SWAP
3555 return nia_to_ecu_cache[i].ecu0;
3558 stats__nia_cache_misses++;
3559 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3560 tl_assert(ec);
3561 ecu = VG_(get_ECU_from_ExeContext)(ec);
3562 tl_assert(VG_(is_plausible_ECU)(ecu));
3564 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3565 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3567 nia_to_ecu_cache[i].nia0 = nia;
3568 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3569 return ecu;
3573 /* This marks the stack as addressible but undefined, after a call or
3574 return for a target that has an ABI defined stack redzone. It
3575 happens quite a lot and needs to be fast. This is the version for
3576 origin tracking. The non-origin-tracking version is below. */
3577 VG_REGPARM(3)
3578 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
3580 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
3581 if (0)
3582 VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3583 base, len, nia );
3585 UInt ecu = convert_nia_to_ecu ( nia );
3586 tl_assert(VG_(is_plausible_ECU)(ecu));
3588 UInt otag = ecu | MC_OKIND_STACK;
3590 # if 0
3591 /* Slow(ish) version, which is fairly easily seen to be correct.
3593 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3594 make_aligned_word64_undefined_w_otag(base + 0, otag);
3595 make_aligned_word64_undefined_w_otag(base + 8, otag);
3596 make_aligned_word64_undefined_w_otag(base + 16, otag);
3597 make_aligned_word64_undefined_w_otag(base + 24, otag);
3599 make_aligned_word64_undefined_w_otag(base + 32, otag);
3600 make_aligned_word64_undefined_w_otag(base + 40, otag);
3601 make_aligned_word64_undefined_w_otag(base + 48, otag);
3602 make_aligned_word64_undefined_w_otag(base + 56, otag);
3604 make_aligned_word64_undefined_w_otag(base + 64, otag);
3605 make_aligned_word64_undefined_w_otag(base + 72, otag);
3606 make_aligned_word64_undefined_w_otag(base + 80, otag);
3607 make_aligned_word64_undefined_w_otag(base + 88, otag);
3609 make_aligned_word64_undefined_w_otag(base + 96, otag);
3610 make_aligned_word64_undefined_w_otag(base + 104, otag);
3611 make_aligned_word64_undefined_w_otag(base + 112, otag);
3612 make_aligned_word64_undefined_w_otag(base + 120, otag);
3613 } else {
3614 MC_(make_mem_undefined_w_otag)(base, len, otag);
3616 # endif
3618 /* Idea is: go fast when
3619 * 8-aligned and length is 128
3620 * the sm is available in the main primary map
3621 * the address range falls entirely with a single secondary map
3622 If all those conditions hold, just update the V+A bits by writing
3623 directly into the vabits array. (If the sm was distinguished, this
3624 will make a copy and then write to it.)
3626 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3627 /* Now we know the address range is suitably sized and aligned. */
3628 UWord a_lo = (UWord)(base);
3629 UWord a_hi = (UWord)(base + 128 - 1);
3630 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3631 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3632 /* Now we know the entire range is within the main primary map. */
3633 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3634 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3635 if (LIKELY(pm_off_lo == pm_off_hi)) {
3636 /* Now we know that the entire address range falls within a
3637 single secondary map, and that that secondary 'lives' in
3638 the main primary map. */
3639 SecMap* sm = get_secmap_for_writing_low(a_lo);
3640 UWord v_off16 = SM_OFF_16(a_lo);
3641 UShort* p = &sm->vabits16[v_off16];
3642 p[ 0] = VA_BITS16_UNDEFINED;
3643 p[ 1] = VA_BITS16_UNDEFINED;
3644 p[ 2] = VA_BITS16_UNDEFINED;
3645 p[ 3] = VA_BITS16_UNDEFINED;
3646 p[ 4] = VA_BITS16_UNDEFINED;
3647 p[ 5] = VA_BITS16_UNDEFINED;
3648 p[ 6] = VA_BITS16_UNDEFINED;
3649 p[ 7] = VA_BITS16_UNDEFINED;
3650 p[ 8] = VA_BITS16_UNDEFINED;
3651 p[ 9] = VA_BITS16_UNDEFINED;
3652 p[10] = VA_BITS16_UNDEFINED;
3653 p[11] = VA_BITS16_UNDEFINED;
3654 p[12] = VA_BITS16_UNDEFINED;
3655 p[13] = VA_BITS16_UNDEFINED;
3656 p[14] = VA_BITS16_UNDEFINED;
3657 p[15] = VA_BITS16_UNDEFINED;
3658 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3659 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3660 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3661 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3662 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3663 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3664 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3665 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3666 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3667 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3668 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3669 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3670 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3671 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3672 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3673 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3674 return;
3679 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3680 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3681 /* Now we know the address range is suitably sized and aligned. */
3682 UWord a_lo = (UWord)(base);
3683 UWord a_hi = (UWord)(base + 288 - 1);
3684 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3685 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3686 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3687 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3688 if (LIKELY(pm_off_lo == pm_off_hi)) {
3689 /* Now we know that the entire address range falls within a
3690 single secondary map, and that that secondary 'lives' in
3691 the main primary map. */
3692 SecMap* sm = get_secmap_for_writing_low(a_lo);
3693 UWord v_off16 = SM_OFF_16(a_lo);
3694 UShort* p = &sm->vabits16[v_off16];
3695 p[ 0] = VA_BITS16_UNDEFINED;
3696 p[ 1] = VA_BITS16_UNDEFINED;
3697 p[ 2] = VA_BITS16_UNDEFINED;
3698 p[ 3] = VA_BITS16_UNDEFINED;
3699 p[ 4] = VA_BITS16_UNDEFINED;
3700 p[ 5] = VA_BITS16_UNDEFINED;
3701 p[ 6] = VA_BITS16_UNDEFINED;
3702 p[ 7] = VA_BITS16_UNDEFINED;
3703 p[ 8] = VA_BITS16_UNDEFINED;
3704 p[ 9] = VA_BITS16_UNDEFINED;
3705 p[10] = VA_BITS16_UNDEFINED;
3706 p[11] = VA_BITS16_UNDEFINED;
3707 p[12] = VA_BITS16_UNDEFINED;
3708 p[13] = VA_BITS16_UNDEFINED;
3709 p[14] = VA_BITS16_UNDEFINED;
3710 p[15] = VA_BITS16_UNDEFINED;
3711 p[16] = VA_BITS16_UNDEFINED;
3712 p[17] = VA_BITS16_UNDEFINED;
3713 p[18] = VA_BITS16_UNDEFINED;
3714 p[19] = VA_BITS16_UNDEFINED;
3715 p[20] = VA_BITS16_UNDEFINED;
3716 p[21] = VA_BITS16_UNDEFINED;
3717 p[22] = VA_BITS16_UNDEFINED;
3718 p[23] = VA_BITS16_UNDEFINED;
3719 p[24] = VA_BITS16_UNDEFINED;
3720 p[25] = VA_BITS16_UNDEFINED;
3721 p[26] = VA_BITS16_UNDEFINED;
3722 p[27] = VA_BITS16_UNDEFINED;
3723 p[28] = VA_BITS16_UNDEFINED;
3724 p[29] = VA_BITS16_UNDEFINED;
3725 p[30] = VA_BITS16_UNDEFINED;
3726 p[31] = VA_BITS16_UNDEFINED;
3727 p[32] = VA_BITS16_UNDEFINED;
3728 p[33] = VA_BITS16_UNDEFINED;
3729 p[34] = VA_BITS16_UNDEFINED;
3730 p[35] = VA_BITS16_UNDEFINED;
3731 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3732 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3733 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3734 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3735 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3736 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3737 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3738 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3739 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3740 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3741 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3742 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3743 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3744 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3745 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3746 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3747 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3748 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3749 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3750 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3751 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3752 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3753 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3754 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3755 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3756 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3757 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3758 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3759 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3760 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3761 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3762 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3763 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3764 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3765 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3766 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3767 return;
3772 /* else fall into slow case */
3773 MC_(make_mem_undefined_w_otag)(base, len, otag);
3777 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3778 specialised for the non-origin-tracking case. */
3779 VG_REGPARM(2)
3780 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
3782 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
3783 if (0)
3784 VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3785 base, len );
3787 # if 0
3788 /* Slow(ish) version, which is fairly easily seen to be correct.
3790 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3791 make_aligned_word64_undefined(base + 0);
3792 make_aligned_word64_undefined(base + 8);
3793 make_aligned_word64_undefined(base + 16);
3794 make_aligned_word64_undefined(base + 24);
3796 make_aligned_word64_undefined(base + 32);
3797 make_aligned_word64_undefined(base + 40);
3798 make_aligned_word64_undefined(base + 48);
3799 make_aligned_word64_undefined(base + 56);
3801 make_aligned_word64_undefined(base + 64);
3802 make_aligned_word64_undefined(base + 72);
3803 make_aligned_word64_undefined(base + 80);
3804 make_aligned_word64_undefined(base + 88);
3806 make_aligned_word64_undefined(base + 96);
3807 make_aligned_word64_undefined(base + 104);
3808 make_aligned_word64_undefined(base + 112);
3809 make_aligned_word64_undefined(base + 120);
3810 } else {
3811 make_mem_undefined(base, len);
3813 # endif
3815 /* Idea is: go fast when
3816 * 8-aligned and length is 128
3817 * the sm is available in the main primary map
3818 * the address range falls entirely with a single secondary map
3819 If all those conditions hold, just update the V+A bits by writing
3820 directly into the vabits array. (If the sm was distinguished, this
3821 will make a copy and then write to it.)
3823 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3824 /* Now we know the address range is suitably sized and aligned. */
3825 UWord a_lo = (UWord)(base);
3826 UWord a_hi = (UWord)(base + 128 - 1);
3827 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3828 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3829 /* Now we know the entire range is within the main primary map. */
3830 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3831 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3832 if (LIKELY(pm_off_lo == pm_off_hi)) {
3833 /* Now we know that the entire address range falls within a
3834 single secondary map, and that that secondary 'lives' in
3835 the main primary map. */
3836 SecMap* sm = get_secmap_for_writing_low(a_lo);
3837 UWord v_off16 = SM_OFF_16(a_lo);
3838 UShort* p = &sm->vabits16[v_off16];
3839 p[ 0] = VA_BITS16_UNDEFINED;
3840 p[ 1] = VA_BITS16_UNDEFINED;
3841 p[ 2] = VA_BITS16_UNDEFINED;
3842 p[ 3] = VA_BITS16_UNDEFINED;
3843 p[ 4] = VA_BITS16_UNDEFINED;
3844 p[ 5] = VA_BITS16_UNDEFINED;
3845 p[ 6] = VA_BITS16_UNDEFINED;
3846 p[ 7] = VA_BITS16_UNDEFINED;
3847 p[ 8] = VA_BITS16_UNDEFINED;
3848 p[ 9] = VA_BITS16_UNDEFINED;
3849 p[10] = VA_BITS16_UNDEFINED;
3850 p[11] = VA_BITS16_UNDEFINED;
3851 p[12] = VA_BITS16_UNDEFINED;
3852 p[13] = VA_BITS16_UNDEFINED;
3853 p[14] = VA_BITS16_UNDEFINED;
3854 p[15] = VA_BITS16_UNDEFINED;
3855 return;
3860 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3861 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3862 /* Now we know the address range is suitably sized and aligned. */
3863 UWord a_lo = (UWord)(base);
3864 UWord a_hi = (UWord)(base + 288 - 1);
3865 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3866 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3867 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3868 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3869 if (LIKELY(pm_off_lo == pm_off_hi)) {
3870 /* Now we know that the entire address range falls within a
3871 single secondary map, and that that secondary 'lives' in
3872 the main primary map. */
3873 SecMap* sm = get_secmap_for_writing_low(a_lo);
3874 UWord v_off16 = SM_OFF_16(a_lo);
3875 UShort* p = &sm->vabits16[v_off16];
3876 p[ 0] = VA_BITS16_UNDEFINED;
3877 p[ 1] = VA_BITS16_UNDEFINED;
3878 p[ 2] = VA_BITS16_UNDEFINED;
3879 p[ 3] = VA_BITS16_UNDEFINED;
3880 p[ 4] = VA_BITS16_UNDEFINED;
3881 p[ 5] = VA_BITS16_UNDEFINED;
3882 p[ 6] = VA_BITS16_UNDEFINED;
3883 p[ 7] = VA_BITS16_UNDEFINED;
3884 p[ 8] = VA_BITS16_UNDEFINED;
3885 p[ 9] = VA_BITS16_UNDEFINED;
3886 p[10] = VA_BITS16_UNDEFINED;
3887 p[11] = VA_BITS16_UNDEFINED;
3888 p[12] = VA_BITS16_UNDEFINED;
3889 p[13] = VA_BITS16_UNDEFINED;
3890 p[14] = VA_BITS16_UNDEFINED;
3891 p[15] = VA_BITS16_UNDEFINED;
3892 p[16] = VA_BITS16_UNDEFINED;
3893 p[17] = VA_BITS16_UNDEFINED;
3894 p[18] = VA_BITS16_UNDEFINED;
3895 p[19] = VA_BITS16_UNDEFINED;
3896 p[20] = VA_BITS16_UNDEFINED;
3897 p[21] = VA_BITS16_UNDEFINED;
3898 p[22] = VA_BITS16_UNDEFINED;
3899 p[23] = VA_BITS16_UNDEFINED;
3900 p[24] = VA_BITS16_UNDEFINED;
3901 p[25] = VA_BITS16_UNDEFINED;
3902 p[26] = VA_BITS16_UNDEFINED;
3903 p[27] = VA_BITS16_UNDEFINED;
3904 p[28] = VA_BITS16_UNDEFINED;
3905 p[29] = VA_BITS16_UNDEFINED;
3906 p[30] = VA_BITS16_UNDEFINED;
3907 p[31] = VA_BITS16_UNDEFINED;
3908 p[32] = VA_BITS16_UNDEFINED;
3909 p[33] = VA_BITS16_UNDEFINED;
3910 p[34] = VA_BITS16_UNDEFINED;
3911 p[35] = VA_BITS16_UNDEFINED;
3912 return;
3917 /* else fall into slow case */
3918 make_mem_undefined(base, len);
3922 /* And this is an even more specialised case, for the case where there
3923 is no origin tracking, and the length is 128. */
3924 VG_REGPARM(1)
3925 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
3927 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
3928 if (0)
3929 VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
3931 # if 0
3932 /* Slow(ish) version, which is fairly easily seen to be correct.
3934 if (LIKELY( VG_IS_8_ALIGNED(base) )) {
3935 make_aligned_word64_undefined(base + 0);
3936 make_aligned_word64_undefined(base + 8);
3937 make_aligned_word64_undefined(base + 16);
3938 make_aligned_word64_undefined(base + 24);
3940 make_aligned_word64_undefined(base + 32);
3941 make_aligned_word64_undefined(base + 40);
3942 make_aligned_word64_undefined(base + 48);
3943 make_aligned_word64_undefined(base + 56);
3945 make_aligned_word64_undefined(base + 64);
3946 make_aligned_word64_undefined(base + 72);
3947 make_aligned_word64_undefined(base + 80);
3948 make_aligned_word64_undefined(base + 88);
3950 make_aligned_word64_undefined(base + 96);
3951 make_aligned_word64_undefined(base + 104);
3952 make_aligned_word64_undefined(base + 112);
3953 make_aligned_word64_undefined(base + 120);
3954 } else {
3955 make_mem_undefined(base, 128);
3957 # endif
3959 /* Idea is: go fast when
3960 * 16-aligned and length is 128
3961 * the sm is available in the main primary map
3962 * the address range falls entirely with a single secondary map
3963 If all those conditions hold, just update the V+A bits by writing
3964 directly into the vabits array. (If the sm was distinguished, this
3965 will make a copy and then write to it.)
3967 Typically this applies to amd64 'ret' instructions, since RSP is
3968 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
3970 if (LIKELY( VG_IS_16_ALIGNED(base) )) {
3971 /* Now we know the address range is suitably sized and aligned. */
3972 UWord a_lo = (UWord)(base);
3973 UWord a_hi = (UWord)(base + 128 - 1);
3974 /* FIXME: come up with a sane story on the wraparound case
3975 (which of course cnanot happen, but still..) */
3976 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
3977 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3978 /* Now we know the entire range is within the main primary map. */
3979 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3980 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3981 if (LIKELY(pm_off_lo == pm_off_hi)) {
3982 /* Now we know that the entire address range falls within a
3983 single secondary map, and that that secondary 'lives' in
3984 the main primary map. */
3985 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
3986 SecMap* sm = get_secmap_for_writing_low(a_lo);
3987 UWord v_off = SM_OFF(a_lo);
3988 UInt* w32 = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
3989 w32[ 0] = VA_BITS32_UNDEFINED;
3990 w32[ 1] = VA_BITS32_UNDEFINED;
3991 w32[ 2] = VA_BITS32_UNDEFINED;
3992 w32[ 3] = VA_BITS32_UNDEFINED;
3993 w32[ 4] = VA_BITS32_UNDEFINED;
3994 w32[ 5] = VA_BITS32_UNDEFINED;
3995 w32[ 6] = VA_BITS32_UNDEFINED;
3996 w32[ 7] = VA_BITS32_UNDEFINED;
3997 return;
4002 /* The same, but for when base is 8 % 16, which is the situation
4003 with RSP for amd64-ELF immediately after call instructions.
4005 if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
4006 /* Now we know the address range is suitably sized and aligned. */
4007 UWord a_lo = (UWord)(base);
4008 UWord a_hi = (UWord)(base + 128 - 1);
4009 /* FIXME: come up with a sane story on the wraparound case
4010 (which of course cnanot happen, but still..) */
4011 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4012 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4013 /* Now we know the entire range is within the main primary map. */
4014 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4015 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4016 if (LIKELY(pm_off_lo == pm_off_hi)) {
4017 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
4018 /* Now we know that the entire address range falls within a
4019 single secondary map, and that that secondary 'lives' in
4020 the main primary map. */
4021 SecMap* sm = get_secmap_for_writing_low(a_lo);
4022 UWord v_off16 = SM_OFF_16(a_lo);
4023 UShort* w16 = &sm->vabits16[v_off16];
4024 UInt* w32 = ASSUME_ALIGNED(UInt*, &w16[1]);
4025 /* The following assertion is commented out for obvious
4026 performance reasons, but was verified as valid when
4027 running the entire testsuite and also Firefox. */
4028 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4029 w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
4030 w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
4031 w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
4032 w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
4033 w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
4034 w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
4035 w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
4036 w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
4037 w16[15] = VA_BITS16_UNDEFINED; // w16[15]
4038 return;
4043 /* else fall into slow case */
4044 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
4045 make_mem_undefined(base, 128);
4049 /*------------------------------------------------------------*/
4050 /*--- Checking memory ---*/
4051 /*------------------------------------------------------------*/
4053 typedef
4054 enum {
4055 MC_Ok = 5,
4056 MC_AddrErr = 6,
4057 MC_ValueErr = 7
4059 MC_ReadResult;
4062 /* Check permissions for address range. If inadequate permissions
4063 exist, *bad_addr is set to the offending address, so the caller can
4064 know what it is. */
4066 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4067 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4068 indicate the lowest failing address. Functions below are
4069 similar. */
4070 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
4072 SizeT i;
4073 UWord vabits2;
4075 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
4076 for (i = 0; i < len; i++) {
4077 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
4078 vabits2 = get_vabits2(a);
4079 if (VA_BITS2_NOACCESS != vabits2) {
4080 if (bad_addr != NULL) *bad_addr = a;
4081 return False;
4083 a++;
4085 return True;
4088 static Bool is_mem_addressable ( Addr a, SizeT len,
4089 /*OUT*/Addr* bad_addr )
4091 SizeT i;
4092 UWord vabits2;
4094 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
4095 for (i = 0; i < len; i++) {
4096 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
4097 vabits2 = get_vabits2(a);
4098 if (VA_BITS2_NOACCESS == vabits2) {
4099 if (bad_addr != NULL) *bad_addr = a;
4100 return False;
4102 a++;
4104 return True;
4107 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
4108 /*OUT*/Addr* bad_addr,
4109 /*OUT*/UInt* otag )
4111 SizeT i;
4112 UWord vabits2;
4114 PROF_EVENT(MCPE_IS_MEM_DEFINED);
4115 DEBUG("is_mem_defined\n");
4117 if (otag) *otag = 0;
4118 if (bad_addr) *bad_addr = 0;
4119 for (i = 0; i < len; i++) {
4120 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
4121 vabits2 = get_vabits2(a);
4122 if (VA_BITS2_DEFINED != vabits2) {
4123 // Error! Nb: Report addressability errors in preference to
4124 // definedness errors. And don't report definedeness errors unless
4125 // --undef-value-errors=yes.
4126 if (bad_addr) {
4127 *bad_addr = a;
4129 if (VA_BITS2_NOACCESS == vabits2) {
4130 return MC_AddrErr;
4132 if (MC_(clo_mc_level) >= 2) {
4133 if (otag && MC_(clo_mc_level) == 3) {
4134 *otag = MC_(helperc_b_load1)( a );
4136 return MC_ValueErr;
4139 a++;
4141 return MC_Ok;
4145 /* Like is_mem_defined but doesn't give up at the first uninitialised
4146 byte -- the entire range is always checked. This is important for
4147 detecting errors in the case where a checked range strays into
4148 invalid memory, but that fact is not detected by the ordinary
4149 is_mem_defined(), because of an undefined section that precedes the
4150 out of range section, possibly as a result of an alignment hole in
4151 the checked data. This version always checks the entire range and
4152 can report both a definedness and an accessbility error, if
4153 necessary. */
4154 static void is_mem_defined_comprehensive (
4155 Addr a, SizeT len,
4156 /*OUT*/Bool* errorV, /* is there a definedness err? */
4157 /*OUT*/Addr* bad_addrV, /* if so where? */
4158 /*OUT*/UInt* otagV, /* and what's its otag? */
4159 /*OUT*/Bool* errorA, /* is there an addressability err? */
4160 /*OUT*/Addr* bad_addrA /* if so where? */
4163 SizeT i;
4164 UWord vabits2;
4165 Bool already_saw_errV = False;
4167 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
4168 DEBUG("is_mem_defined_comprehensive\n");
4170 tl_assert(!(*errorV || *errorA));
4172 for (i = 0; i < len; i++) {
4173 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
4174 vabits2 = get_vabits2(a);
4175 switch (vabits2) {
4176 case VA_BITS2_DEFINED:
4177 a++;
4178 break;
4179 case VA_BITS2_UNDEFINED:
4180 case VA_BITS2_PARTDEFINED:
4181 if (!already_saw_errV) {
4182 *errorV = True;
4183 *bad_addrV = a;
4184 if (MC_(clo_mc_level) == 3) {
4185 *otagV = MC_(helperc_b_load1)( a );
4186 } else {
4187 *otagV = 0;
4189 already_saw_errV = True;
4191 a++; /* keep going */
4192 break;
4193 case VA_BITS2_NOACCESS:
4194 *errorA = True;
4195 *bad_addrA = a;
4196 return; /* give up now. */
4197 default:
4198 tl_assert(0);
4204 /* Check a zero-terminated ascii string. Tricky -- don't want to
4205 examine the actual bytes, to find the end, until we're sure it is
4206 safe to do so. */
4208 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
4210 UWord vabits2;
4212 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
4213 DEBUG("mc_is_defined_asciiz\n");
4215 if (otag) *otag = 0;
4216 if (bad_addr) *bad_addr = 0;
4217 while (True) {
4218 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
4219 vabits2 = get_vabits2(a);
4220 if (VA_BITS2_DEFINED != vabits2) {
4221 // Error! Nb: Report addressability errors in preference to
4222 // definedness errors. And don't report definedeness errors unless
4223 // --undef-value-errors=yes.
4224 if (bad_addr) {
4225 *bad_addr = a;
4227 if (VA_BITS2_NOACCESS == vabits2) {
4228 return MC_AddrErr;
4230 if (MC_(clo_mc_level) >= 2) {
4231 if (otag && MC_(clo_mc_level) == 3) {
4232 *otag = MC_(helperc_b_load1)( a );
4234 return MC_ValueErr;
4237 /* Ok, a is safe to read. */
4238 if (* ((UChar*)a) == 0) {
4239 return MC_Ok;
4241 a++;
4246 /*------------------------------------------------------------*/
4247 /*--- Memory event handlers ---*/
4248 /*------------------------------------------------------------*/
4250 static
4251 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
4252 Addr base, SizeT size )
4254 Addr bad_addr;
4255 Bool ok = is_mem_addressable ( base, size, &bad_addr );
4257 if (!ok) {
4258 switch (part) {
4259 case Vg_CoreSysCall:
4260 MC_(record_memparam_error) ( tid, bad_addr,
4261 /*isAddrErr*/True, s, 0/*otag*/ );
4262 break;
4264 case Vg_CoreSignal:
4265 MC_(record_core_mem_error)( tid, s );
4266 break;
4268 default:
4269 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
4274 static
4275 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
4276 Addr base, SizeT size )
4278 UInt otag = 0;
4279 Addr bad_addr;
4280 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
4282 if (MC_Ok != res) {
4283 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4285 switch (part) {
4286 case Vg_CoreSysCall:
4287 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4288 isAddrErr ? 0 : otag );
4289 break;
4291 case Vg_CoreSysCallArgInMem:
4292 MC_(record_regparam_error) ( tid, s, otag );
4293 break;
4295 /* If we're being asked to jump to a silly address, record an error
4296 message before potentially crashing the entire system. */
4297 case Vg_CoreTranslate:
4298 MC_(record_jump_error)( tid, bad_addr );
4299 break;
4301 default:
4302 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
4307 static
4308 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
4309 const HChar* s, Addr str )
4311 MC_ReadResult res;
4312 Addr bad_addr = 0; // shut GCC up
4313 UInt otag = 0;
4315 tl_assert(part == Vg_CoreSysCall);
4316 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4317 if (MC_Ok != res) {
4318 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4319 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4320 isAddrErr ? 0 : otag );
4324 /* Handling of mmap and mprotect is not as simple as it seems.
4326 The underlying semantics are that memory obtained from mmap is
4327 always initialised, but may be inaccessible. And changes to the
4328 protection of memory do not change its contents and hence not its
4329 definedness state. Problem is we can't model
4330 inaccessible-but-with-some-definedness state; once we mark memory
4331 as inaccessible we lose all info about definedness, and so can't
4332 restore that if it is later made accessible again.
4334 One obvious thing to do is this:
4336 mmap/mprotect NONE -> noaccess
4337 mmap/mprotect other -> defined
4339 The problem case here is: taking accessible memory, writing
4340 uninitialised data to it, mprotecting it NONE and later mprotecting
4341 it back to some accessible state causes the undefinedness to be
4342 lost.
4344 A better proposal is:
4346 (1) mmap NONE -> make noaccess
4347 (2) mmap other -> make defined
4349 (3) mprotect NONE -> # no change
4350 (4) mprotect other -> change any "noaccess" to "defined"
4352 (2) is OK because memory newly obtained from mmap really is defined
4353 (zeroed out by the kernel -- doing anything else would
4354 constitute a massive security hole.)
4356 (1) is OK because the only way to make the memory usable is via
4357 (4), in which case we also wind up correctly marking it all as
4358 defined.
4360 (3) is the weak case. We choose not to change memory state.
4361 (presumably the range is in some mixture of "defined" and
4362 "undefined", viz, accessible but with arbitrary V bits). Doing
4363 nothing means we retain the V bits, so that if the memory is
4364 later mprotected "other", the V bits remain unchanged, so there
4365 can be no false negatives. The bad effect is that if there's
4366 an access in the area, then MC cannot warn; but at least we'll
4367 get a SEGV to show, so it's better than nothing.
4369 Consider the sequence (3) followed by (4). Any memory that was
4370 "defined" or "undefined" previously retains its state (as
4371 required). Any memory that was "noaccess" before can only have
4372 been made that way by (1), and so it's OK to change it to
4373 "defined".
4375 See https://bugs.kde.org/show_bug.cgi?id=205541
4376 and https://bugs.kde.org/show_bug.cgi?id=210268
4378 static
4379 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4380 ULong di_handle )
4382 if (rr || ww || xx) {
4383 /* (2) mmap/mprotect other -> defined */
4384 MC_(make_mem_defined)(a, len);
4385 } else {
4386 /* (1) mmap/mprotect NONE -> noaccess */
4387 MC_(make_mem_noaccess)(a, len);
4391 static
4392 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4394 if (rr || ww || xx) {
4395 /* (4) mprotect other -> change any "noaccess" to "defined" */
4396 make_mem_defined_if_noaccess(a, len);
4397 } else {
4398 /* (3) mprotect NONE -> # no change */
4399 /* do nothing */
4404 static
4405 void mc_new_mem_startup( Addr a, SizeT len,
4406 Bool rr, Bool ww, Bool xx, ULong di_handle )
4408 // Because code is defined, initialised variables get put in the data
4409 // segment and are defined, and uninitialised variables get put in the
4410 // bss segment and are auto-zeroed (and so defined).
4412 // It's possible that there will be padding between global variables.
4413 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4414 // a program uses it, Memcheck will not complain. This is arguably a
4415 // false negative, but it's a grey area -- the behaviour is defined (the
4416 // padding is zeroed) but it's probably not what the user intended. And
4417 // we can't avoid it.
4419 // Note: we generally ignore RWX permissions, because we can't track them
4420 // without requiring more than one A bit which would slow things down a
4421 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4422 // So we mark any such pages as "unaddressable".
4423 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4424 a, (ULong)len, rr, ww, xx);
4425 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4428 static
4429 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4431 MC_(make_mem_defined)(a, len);
4435 /*------------------------------------------------------------*/
4436 /*--- Register event handlers ---*/
4437 /*------------------------------------------------------------*/
4439 /* Try and get a nonzero origin for the guest state section of thread
4440 tid characterised by (offset,size). Return 0 if nothing to show
4441 for it. */
4442 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4443 Int offset, SizeT size )
4445 Int sh2off;
4446 UInt area[3];
4447 UInt otag;
4448 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4449 if (sh2off == -1)
4450 return 0; /* This piece of guest state is not tracked */
4451 tl_assert(sh2off >= 0);
4452 tl_assert(0 == (sh2off % 4));
4453 area[0] = 0x31313131;
4454 area[2] = 0x27272727;
4455 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4456 tl_assert(area[0] == 0x31313131);
4457 tl_assert(area[2] == 0x27272727);
4458 otag = area[1];
4459 return otag;
4463 /* When some chunk of guest state is written, mark the corresponding
4464 shadow area as valid. This is used to initialise arbitrarily large
4465 chunks of guest state, hence the _SIZE value, which has to be as
4466 big as the biggest guest state.
4468 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4469 PtrdiffT offset, SizeT size)
4471 # define MAX_REG_WRITE_SIZE 1744
4472 UChar area[MAX_REG_WRITE_SIZE];
4473 tl_assert(size <= MAX_REG_WRITE_SIZE);
4474 VG_(memset)(area, V_BITS8_DEFINED, size);
4475 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4476 # undef MAX_REG_WRITE_SIZE
4479 static
4480 void mc_post_reg_write_clientcall ( ThreadId tid,
4481 PtrdiffT offset, SizeT size, Addr f)
4483 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4486 /* Look at the definedness of the guest's shadow state for
4487 [offset, offset+len). If any part of that is undefined, record
4488 a parameter error.
4490 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4491 PtrdiffT offset, SizeT size)
4493 Int i;
4494 Bool bad;
4495 UInt otag;
4497 UChar area[16];
4498 tl_assert(size <= 16);
4500 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4502 bad = False;
4503 for (i = 0; i < size; i++) {
4504 if (area[i] != V_BITS8_DEFINED) {
4505 bad = True;
4506 break;
4510 if (!bad)
4511 return;
4513 /* We've found some undefinedness. See if we can also find an
4514 origin for it. */
4515 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4516 MC_(record_regparam_error) ( tid, s, otag );
4520 /*------------------------------------------------------------*/
4521 /*--- Register-memory event handlers ---*/
4522 /*------------------------------------------------------------*/
4524 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4525 PtrdiffT guest_state_offset, SizeT size )
4527 SizeT i;
4528 UChar vbits8;
4529 Int offset;
4530 UInt d32;
4532 /* Slow loop. */
4533 for (i = 0; i < size; i++) {
4534 get_vbits8( a+i, &vbits8 );
4535 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4536 1, &vbits8 );
4539 if (MC_(clo_mc_level) != 3)
4540 return;
4542 /* Track origins. */
4543 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4544 if (offset == -1)
4545 return;
4547 switch (size) {
4548 case 1:
4549 d32 = MC_(helperc_b_load1)( a );
4550 break;
4551 case 2:
4552 d32 = MC_(helperc_b_load2)( a );
4553 break;
4554 case 4:
4555 d32 = MC_(helperc_b_load4)( a );
4556 break;
4557 case 8:
4558 d32 = MC_(helperc_b_load8)( a );
4559 break;
4560 case 16:
4561 d32 = MC_(helperc_b_load16)( a );
4562 break;
4563 case 32:
4564 d32 = MC_(helperc_b_load32)( a );
4565 break;
4566 default:
4567 tl_assert(0);
4570 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4573 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4574 PtrdiffT guest_state_offset, Addr a,
4575 SizeT size )
4577 SizeT i;
4578 UChar vbits8;
4579 Int offset;
4580 UInt d32;
4582 /* Slow loop. */
4583 for (i = 0; i < size; i++) {
4584 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4585 guest_state_offset+i, 1 );
4586 set_vbits8( a+i, vbits8 );
4589 if (MC_(clo_mc_level) != 3)
4590 return;
4592 /* Track origins. */
4593 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4594 if (offset == -1)
4595 return;
4597 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4598 switch (size) {
4599 case 1:
4600 MC_(helperc_b_store1)( a, d32 );
4601 break;
4602 case 2:
4603 MC_(helperc_b_store2)( a, d32 );
4604 break;
4605 case 4:
4606 MC_(helperc_b_store4)( a, d32 );
4607 break;
4608 case 8:
4609 MC_(helperc_b_store8)( a, d32 );
4610 break;
4611 case 16:
4612 MC_(helperc_b_store16)( a, d32 );
4613 break;
4614 case 32:
4615 MC_(helperc_b_store32)( a, d32 );
4616 break;
4617 default:
4618 tl_assert(0);
4623 /*------------------------------------------------------------*/
4624 /*--- Some static assertions ---*/
4625 /*------------------------------------------------------------*/
4627 /* The handwritten assembly helpers below have baked-in assumptions
4628 about various constant values. These assertions attempt to make
4629 that a bit safer by checking those values and flagging changes that
4630 would make the assembly invalid. Not perfect but it's better than
4631 nothing. */
4633 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4635 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4636 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4638 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4639 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4641 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4642 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4644 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4645 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4647 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4648 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4650 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4651 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4654 /*------------------------------------------------------------*/
4655 /*--- Functions called directly from generated code: ---*/
4656 /*--- Load/store handlers. ---*/
4657 /*------------------------------------------------------------*/
4659 /* Types: LOADV32, LOADV16, LOADV8 are:
4660 UWord fn ( Addr a )
4661 so they return 32-bits on 32-bit machines and 64-bits on
4662 64-bit machines. Addr has the same size as a host word.
4664 LOADV64 is always ULong fn ( Addr a )
4666 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4667 are a UWord, and for STOREV64 they are a ULong.
4670 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4671 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4672 primary map. This is all very tricky (and important!), so let's
4673 work through the maths by hand (below), *and* assert for these
4674 values at startup. */
4675 #define MASK(_szInBytes) \
4676 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4678 /* MASK only exists so as to define this macro. */
4679 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4680 ((_a) & MASK((_szInBits>>3)))
4682 /* On a 32-bit machine:
4684 N_PRIMARY_BITS == 16, so
4685 N_PRIMARY_MAP == 0x10000, so
4686 N_PRIMARY_MAP-1 == 0xFFFF, so
4687 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4689 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4690 = ~ ( 0xFFFF | 0xFFFF0000 )
4691 = ~ 0xFFFF'FFFF
4694 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4695 = ~ ( 0xFFFE | 0xFFFF0000 )
4696 = ~ 0xFFFF'FFFE
4699 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4700 = ~ ( 0xFFFC | 0xFFFF0000 )
4701 = ~ 0xFFFF'FFFC
4704 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4705 = ~ ( 0xFFF8 | 0xFFFF0000 )
4706 = ~ 0xFFFF'FFF8
4709 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4710 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4711 the 1-byte alignment case, it is always a zero value, since MASK(1)
4712 is zero. All as expected.
4714 On a 64-bit machine, it's more complex, since we're testing
4715 simultaneously for misalignment and for the address being at or
4716 above 64G:
4718 N_PRIMARY_BITS == 20, so
4719 N_PRIMARY_MAP == 0x100000, so
4720 N_PRIMARY_MAP-1 == 0xFFFFF, so
4721 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4723 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4724 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4725 = ~ 0xF'FFFF'FFFF
4726 = 0xFFFF'FFF0'0000'0000
4728 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4729 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4730 = ~ 0xF'FFFF'FFFE
4731 = 0xFFFF'FFF0'0000'0001
4733 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4734 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4735 = ~ 0xF'FFFF'FFFC
4736 = 0xFFFF'FFF0'0000'0003
4738 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4739 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4740 = ~ 0xF'FFFF'FFF8
4741 = 0xFFFF'FFF0'0000'0007
4744 /*------------------------------------------------------------*/
4745 /*--- LOADV256 and LOADV128 ---*/
4746 /*------------------------------------------------------------*/
4748 static INLINE
4749 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4750 Addr a, SizeT nBits, Bool isBigEndian )
4752 PROF_EVENT(MCPE_LOADV_128_OR_256);
4754 #ifndef PERF_FAST_LOADV
4755 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4756 return;
4757 #else
4759 UWord sm_off16, vabits16, j;
4760 UWord nBytes = nBits / 8;
4761 UWord nULongs = nBytes / 8;
4762 SecMap* sm;
4764 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4765 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4766 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4767 return;
4770 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4771 suitably aligned, is mapped, and addressible. */
4772 for (j = 0; j < nULongs; j++) {
4773 sm = get_secmap_for_reading_low(a + 8*j);
4774 sm_off16 = SM_OFF_16(a + 8*j);
4775 vabits16 = sm->vabits16[sm_off16];
4777 // Convert V bits from compact memory form to expanded
4778 // register form.
4779 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4780 res[j] = V_BITS64_DEFINED;
4781 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4782 res[j] = V_BITS64_UNDEFINED;
4783 } else {
4784 /* Slow case: some block of 8 bytes are not all-defined or
4785 all-undefined. */
4786 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4787 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4788 return;
4791 return;
4793 #endif
4796 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4798 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4800 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4802 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4805 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4807 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4809 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4811 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4814 /*------------------------------------------------------------*/
4815 /*--- LOADV64 ---*/
4816 /*------------------------------------------------------------*/
4818 static INLINE
4819 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4821 PROF_EVENT(MCPE_LOADV64);
4823 #ifndef PERF_FAST_LOADV
4824 return mc_LOADVn_slow( a, 64, isBigEndian );
4825 #else
4827 UWord sm_off16, vabits16;
4828 SecMap* sm;
4830 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4831 PROF_EVENT(MCPE_LOADV64_SLOW1);
4832 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4835 sm = get_secmap_for_reading_low(a);
4836 sm_off16 = SM_OFF_16(a);
4837 vabits16 = sm->vabits16[sm_off16];
4839 // Handle common case quickly: a is suitably aligned, is mapped, and
4840 // addressible.
4841 // Convert V bits from compact memory form to expanded register form.
4842 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4843 return V_BITS64_DEFINED;
4844 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4845 return V_BITS64_UNDEFINED;
4846 } else {
4847 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4848 PROF_EVENT(MCPE_LOADV64_SLOW2);
4849 return mc_LOADVn_slow( a, 64, isBigEndian );
4852 #endif
4855 // Generic for all platforms
4856 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4858 return mc_LOADV64(a, True);
4861 // Non-generic assembly for arm32-linux
4862 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4863 && defined(VGP_arm_linux)
4864 __asm__( /* Derived from the 32 bit assembly helper */
4865 ".text \n"
4866 ".align 2 \n"
4867 ".global vgMemCheck_helperc_LOADV64le \n"
4868 ".type vgMemCheck_helperc_LOADV64le, %function \n"
4869 "vgMemCheck_helperc_LOADV64le: \n"
4870 " tst r0, #7 \n"
4871 " movw r3, #:lower16:primary_map \n"
4872 " bne .LLV64LEc4 \n" // if misaligned
4873 " lsr r2, r0, #16 \n"
4874 " movt r3, #:upper16:primary_map \n"
4875 " ldr r2, [r3, r2, lsl #2] \n"
4876 " uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000
4877 " movw r3, #0xAAAA \n"
4878 " lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4879 " ldrh r1, [r2, r1] \n"
4880 " cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED
4881 " bne .LLV64LEc0 \n" // if !all_defined
4882 " mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4883 " mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4884 " bx lr \n"
4885 ".LLV64LEc0: \n"
4886 " movw r3, #0x5555 \n"
4887 " cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED
4888 " bne .LLV64LEc4 \n" // if !all_undefined
4889 " mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4890 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4891 " bx lr \n"
4892 ".LLV64LEc4: \n"
4893 " push {r4, lr} \n"
4894 " mov r2, #0 \n"
4895 " mov r1, #64 \n"
4896 " bl mc_LOADVn_slow \n"
4897 " pop {r4, pc} \n"
4898 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4899 ".previous\n"
4902 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4903 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4904 __asm__(
4905 ".text\n"
4906 ".align 16\n"
4907 ".global vgMemCheck_helperc_LOADV64le\n"
4908 ".type vgMemCheck_helperc_LOADV64le, @function\n"
4909 "vgMemCheck_helperc_LOADV64le:\n"
4910 " test $0x7, %eax\n"
4911 " jne .LLV64LE2\n" /* jump if not aligned */
4912 " mov %eax, %ecx\n"
4913 " movzwl %ax, %edx\n"
4914 " shr $0x10, %ecx\n"
4915 " mov primary_map(,%ecx,4), %ecx\n"
4916 " shr $0x3, %edx\n"
4917 " movzwl (%ecx,%edx,2), %edx\n"
4918 " cmp $0xaaaa, %edx\n"
4919 " jne .LLV64LE1\n" /* jump if not all defined */
4920 " xor %eax, %eax\n" /* return 0 in edx:eax */
4921 " xor %edx, %edx\n"
4922 " ret\n"
4923 ".LLV64LE1:\n"
4924 " cmp $0x5555, %edx\n"
4925 " jne .LLV64LE2\n" /* jump if not all undefined */
4926 " or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4927 " or $0xffffffff, %edx\n"
4928 " ret\n"
4929 ".LLV64LE2:\n"
4930 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */
4931 " mov $64, %edx\n"
4932 " jmp mc_LOADVn_slow\n"
4933 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4934 ".previous\n"
4937 #else
4938 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
4939 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4941 return mc_LOADV64(a, False);
4943 #endif
4945 /*------------------------------------------------------------*/
4946 /*--- STOREV64 ---*/
4947 /*------------------------------------------------------------*/
4949 static INLINE
4950 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4952 PROF_EVENT(MCPE_STOREV64);
4954 #ifndef PERF_FAST_STOREV
4955 // XXX: this slow case seems to be marginally faster than the fast case!
4956 // Investigate further.
4957 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4958 #else
4960 UWord sm_off16, vabits16;
4961 SecMap* sm;
4963 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4964 PROF_EVENT(MCPE_STOREV64_SLOW1);
4965 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4966 return;
4969 sm = get_secmap_for_reading_low(a);
4970 sm_off16 = SM_OFF_16(a);
4971 vabits16 = sm->vabits16[sm_off16];
4973 // To understand the below cleverness, see the extensive comments
4974 // in MC_(helperc_STOREV8).
4975 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4976 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4977 return;
4979 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4980 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
4981 return;
4983 PROF_EVENT(MCPE_STOREV64_SLOW2);
4984 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4985 return;
4987 if (V_BITS64_UNDEFINED == vbits64) {
4988 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4989 return;
4991 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4992 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
4993 return;
4995 PROF_EVENT(MCPE_STOREV64_SLOW3);
4996 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4997 return;
5000 PROF_EVENT(MCPE_STOREV64_SLOW4);
5001 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5003 #endif
5006 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
5008 mc_STOREV64(a, vbits64, True);
5010 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
5012 mc_STOREV64(a, vbits64, False);
5015 /*------------------------------------------------------------*/
5016 /*--- LOADV32 ---*/
5017 /*------------------------------------------------------------*/
5019 static INLINE
5020 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
5022 PROF_EVENT(MCPE_LOADV32);
5024 #ifndef PERF_FAST_LOADV
5025 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5026 #else
5028 UWord sm_off, vabits8;
5029 SecMap* sm;
5031 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5032 PROF_EVENT(MCPE_LOADV32_SLOW1);
5033 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5036 sm = get_secmap_for_reading_low(a);
5037 sm_off = SM_OFF(a);
5038 vabits8 = sm->vabits8[sm_off];
5040 // Handle common case quickly: a is suitably aligned, is mapped, and the
5041 // entire word32 it lives in is addressible.
5042 // Convert V bits from compact memory form to expanded register form.
5043 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5044 // Almost certainly not necessary, but be paranoid.
5045 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5046 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
5047 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
5048 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
5049 } else {
5050 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5051 PROF_EVENT(MCPE_LOADV32_SLOW2);
5052 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5055 #endif
5058 // Generic for all platforms
5059 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
5061 return mc_LOADV32(a, True);
5064 // Non-generic assembly for arm32-linux
5065 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5066 && defined(VGP_arm_linux)
5067 __asm__( /* Derived from NCode template */
5068 ".text \n"
5069 ".align 2 \n"
5070 ".global vgMemCheck_helperc_LOADV32le \n"
5071 ".type vgMemCheck_helperc_LOADV32le, %function \n"
5072 "vgMemCheck_helperc_LOADV32le: \n"
5073 " tst r0, #3 \n" // 1
5074 " movw r3, #:lower16:primary_map \n" // 1
5075 " bne .LLV32LEc4 \n" // 2 if misaligned
5076 " lsr r2, r0, #16 \n" // 3
5077 " movt r3, #:upper16:primary_map \n" // 3
5078 " ldr r2, [r3, r2, lsl #2] \n" // 4
5079 " uxth r1, r0 \n" // 4
5080 " ldrb r1, [r2, r1, lsr #2] \n" // 5
5081 " cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED
5082 " bne .LLV32LEc0 \n" // 7 if !all_defined
5083 " mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED
5084 " bx lr \n" // 9
5085 ".LLV32LEc0: \n"
5086 " cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED
5087 " bne .LLV32LEc4 \n" // if !all_undefined
5088 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
5089 " bx lr \n"
5090 ".LLV32LEc4: \n"
5091 " push {r4, lr} \n"
5092 " mov r2, #0 \n"
5093 " mov r1, #32 \n"
5094 " bl mc_LOADVn_slow \n"
5095 " pop {r4, pc} \n"
5096 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
5097 ".previous\n"
5100 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5101 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5102 __asm__(
5103 ".text\n"
5104 ".align 16\n"
5105 ".global vgMemCheck_helperc_LOADV32le\n"
5106 ".type vgMemCheck_helperc_LOADV32le, @function\n"
5107 "vgMemCheck_helperc_LOADV32le:\n"
5108 " test $0x3, %eax\n"
5109 " jnz .LLV32LE2\n" /* jump if misaligned */
5110 " mov %eax, %edx\n"
5111 " shr $16, %edx\n"
5112 " mov primary_map(,%edx,4), %ecx\n"
5113 " movzwl %ax, %edx\n"
5114 " shr $2, %edx\n"
5115 " movzbl (%ecx,%edx,1), %edx\n"
5116 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5117 " jne .LLV32LE1\n" /* jump if not completely defined */
5118 " xor %eax, %eax\n" /* else return V_BITS32_DEFINED */
5119 " ret\n"
5120 ".LLV32LE1:\n"
5121 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5122 " jne .LLV32LE2\n" /* jump if not completely undefined */
5123 " or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
5124 " ret\n"
5125 ".LLV32LE2:\n"
5126 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */
5127 " mov $32, %edx\n"
5128 " jmp mc_LOADVn_slow\n"
5129 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
5130 ".previous\n"
5133 #else
5134 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5135 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
5137 return mc_LOADV32(a, False);
5139 #endif
5141 /*------------------------------------------------------------*/
5142 /*--- STOREV32 ---*/
5143 /*------------------------------------------------------------*/
5145 static INLINE
5146 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
5148 PROF_EVENT(MCPE_STOREV32);
5150 #ifndef PERF_FAST_STOREV
5151 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5152 #else
5154 UWord sm_off, vabits8;
5155 SecMap* sm;
5157 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5158 PROF_EVENT(MCPE_STOREV32_SLOW1);
5159 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5160 return;
5163 sm = get_secmap_for_reading_low(a);
5164 sm_off = SM_OFF(a);
5165 vabits8 = sm->vabits8[sm_off];
5167 // To understand the below cleverness, see the extensive comments
5168 // in MC_(helperc_STOREV8).
5169 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
5170 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
5171 return;
5173 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
5174 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
5175 return;
5177 PROF_EVENT(MCPE_STOREV32_SLOW2);
5178 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5179 return;
5181 if (V_BITS32_UNDEFINED == vbits32) {
5182 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
5183 return;
5185 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
5186 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
5187 return;
5189 PROF_EVENT(MCPE_STOREV32_SLOW3);
5190 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5191 return;
5194 PROF_EVENT(MCPE_STOREV32_SLOW4);
5195 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5197 #endif
5200 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
5202 mc_STOREV32(a, vbits32, True);
5204 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
5206 mc_STOREV32(a, vbits32, False);
5209 /*------------------------------------------------------------*/
5210 /*--- LOADV16 ---*/
5211 /*------------------------------------------------------------*/
5213 static INLINE
5214 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
5216 PROF_EVENT(MCPE_LOADV16);
5218 #ifndef PERF_FAST_LOADV
5219 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5220 #else
5222 UWord sm_off, vabits8;
5223 SecMap* sm;
5225 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5226 PROF_EVENT(MCPE_LOADV16_SLOW1);
5227 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5230 sm = get_secmap_for_reading_low(a);
5231 sm_off = SM_OFF(a);
5232 vabits8 = sm->vabits8[sm_off];
5233 // Handle common case quickly: a is suitably aligned, is mapped, and is
5234 // addressible.
5235 // Convert V bits from compact memory form to expanded register form
5236 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
5237 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
5238 else {
5239 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5240 // the two sub-bytes.
5241 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
5242 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
5243 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
5244 else {
5245 /* Slow case: the two bytes are not all-defined or all-undefined. */
5246 PROF_EVENT(MCPE_LOADV16_SLOW2);
5247 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5251 #endif
5254 // Generic for all platforms
5255 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
5257 return mc_LOADV16(a, True);
5260 // Non-generic assembly for arm32-linux
5261 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5262 && defined(VGP_arm_linux)
5263 __asm__( /* Derived from NCode template */
5264 ".text \n"
5265 ".align 2 \n"
5266 ".global vgMemCheck_helperc_LOADV16le \n"
5267 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5268 "vgMemCheck_helperc_LOADV16le: \n" //
5269 " tst r0, #1 \n" //
5270 " bne .LLV16LEc12 \n" // if misaligned
5271 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5272 " movw r3, #:lower16:primary_map \n" //
5273 " uxth r1, r0 \n" // r1 = sec-map-offB
5274 " movt r3, #:upper16:primary_map \n" //
5275 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5276 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5277 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5278 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5279 ".LLV16LEh9: \n" //
5280 " mov r0, #0xFFFFFFFF \n" //
5281 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5282 " bx lr \n" //
5283 ".LLV16LEc0: \n" //
5284 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5285 " bne .LLV16LEc4 \n" //
5286 ".LLV16LEc2: \n" //
5287 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5288 " bx lr \n" //
5289 ".LLV16LEc4: \n" //
5290 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5291 // Extract the relevant 4 bits and inspect.
5292 " and r2, r0, #2 \n" // addr & 2
5293 " add r2, r2, r2 \n" // 2 * (addr & 2)
5294 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5295 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5297 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5298 " beq .LLV16LEh9 \n" //
5300 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5301 " beq .LLV16LEc2 \n" //
5303 ".LLV16LEc12: \n" //
5304 " push {r4, lr} \n" //
5305 " mov r2, #0 \n" //
5306 " mov r1, #16 \n" //
5307 " bl mc_LOADVn_slow \n" //
5308 " pop {r4, pc} \n" //
5309 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5310 ".previous\n"
5313 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5314 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5315 __asm__(
5316 ".text\n"
5317 ".align 16\n"
5318 ".global vgMemCheck_helperc_LOADV16le\n"
5319 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5320 "vgMemCheck_helperc_LOADV16le:\n"
5321 " test $0x1, %eax\n"
5322 " jne .LLV16LE5\n" /* jump if not aligned */
5323 " mov %eax, %edx\n"
5324 " shr $0x10, %edx\n"
5325 " mov primary_map(,%edx,4), %ecx\n"
5326 " movzwl %ax, %edx\n"
5327 " shr $0x2, %edx\n"
5328 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5329 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5330 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5331 ".LLV16LE1:\n"
5332 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5333 " ret\n"
5334 ".LLV16LE2:\n"
5335 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5336 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5337 ".LLV16LE3:\n"
5338 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5339 " ret\n"
5340 ".LLV16LE4:\n"
5341 " mov %eax, %ecx\n"
5342 " and $0x2, %ecx\n"
5343 " add %ecx, %ecx\n"
5344 " sar %cl, %edx\n"
5345 " and $0xf, %edx\n"
5346 " cmp $0xa, %edx\n"
5347 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5348 " cmp $0x5, %edx\n"
5349 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5350 ".LLV16LE5:\n"
5351 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5352 " mov $16, %edx\n"
5353 " jmp mc_LOADVn_slow\n"
5354 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5355 ".previous\n"
5358 #else
5359 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5360 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5362 return mc_LOADV16(a, False);
5364 #endif
5366 /*------------------------------------------------------------*/
5367 /*--- STOREV16 ---*/
5368 /*------------------------------------------------------------*/
5370 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5371 static INLINE
5372 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5374 UInt shift;
5375 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5376 shift = (a & 2) << 1; // shift by 0 or 4
5377 vabits8 >>= shift; // shift the four bits to the bottom
5378 // check 2 x vabits2 != VA_BITS2_NOACCESS
5379 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5380 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5383 static INLINE
5384 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5386 PROF_EVENT(MCPE_STOREV16);
5388 #ifndef PERF_FAST_STOREV
5389 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5390 #else
5392 UWord sm_off, vabits8;
5393 SecMap* sm;
5395 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5396 PROF_EVENT(MCPE_STOREV16_SLOW1);
5397 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5398 return;
5401 sm = get_secmap_for_reading_low(a);
5402 sm_off = SM_OFF(a);
5403 vabits8 = sm->vabits8[sm_off];
5405 // To understand the below cleverness, see the extensive comments
5406 // in MC_(helperc_STOREV8).
5407 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5408 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5409 return;
5411 if (!is_distinguished_sm(sm)
5412 && accessible_vabits4_in_vabits8(a, vabits8)) {
5413 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5414 &(sm->vabits8[sm_off]) );
5415 return;
5417 PROF_EVENT(MCPE_STOREV16_SLOW2);
5418 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5420 if (V_BITS16_UNDEFINED == vbits16) {
5421 if (vabits8 == VA_BITS8_UNDEFINED) {
5422 return;
5424 if (!is_distinguished_sm(sm)
5425 && accessible_vabits4_in_vabits8(a, vabits8)) {
5426 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5427 &(sm->vabits8[sm_off]) );
5428 return;
5430 PROF_EVENT(MCPE_STOREV16_SLOW3);
5431 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5432 return;
5435 PROF_EVENT(MCPE_STOREV16_SLOW4);
5436 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5438 #endif
5442 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5444 mc_STOREV16(a, vbits16, True);
5446 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5448 mc_STOREV16(a, vbits16, False);
5451 /*------------------------------------------------------------*/
5452 /*--- LOADV8 ---*/
5453 /*------------------------------------------------------------*/
5455 /* Note: endianness is irrelevant for size == 1 */
5457 // Non-generic assembly for arm32-linux
5458 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5459 && defined(VGP_arm_linux)
5460 __asm__( /* Derived from NCode template */
5461 ".text \n"
5462 ".align 2 \n"
5463 ".global vgMemCheck_helperc_LOADV8 \n"
5464 ".type vgMemCheck_helperc_LOADV8, %function \n"
5465 "vgMemCheck_helperc_LOADV8: \n" //
5466 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5467 " movw r3, #:lower16:primary_map \n" //
5468 " uxth r1, r0 \n" // r1 = sec-map-offB
5469 " movt r3, #:upper16:primary_map \n" //
5470 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5471 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5472 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5473 " bne .LLV8c0 \n" // no, goto .LLV8c0
5474 ".LLV8h9: \n" //
5475 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5476 " bx lr \n" //
5477 ".LLV8c0: \n" //
5478 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5479 " bne .LLV8c4 \n" //
5480 ".LLV8c2: \n" //
5481 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5482 " bx lr \n" //
5483 ".LLV8c4: \n" //
5484 // r1 holds sec-map-VABITS8
5485 // r0 holds the address. Extract the relevant 2 bits and inspect.
5486 " and r2, r0, #3 \n" // addr & 3
5487 " add r2, r2, r2 \n" // 2 * (addr & 3)
5488 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5489 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5491 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5492 " beq .LLV8h9 \n" //
5494 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5495 " beq .LLV8c2 \n" //
5497 " push {r4, lr} \n" //
5498 " mov r2, #0 \n" //
5499 " mov r1, #8 \n" //
5500 " bl mc_LOADVn_slow \n" //
5501 " pop {r4, pc} \n" //
5502 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5503 ".previous\n"
5506 /* Non-generic assembly for x86-linux */
5507 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5508 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5509 __asm__(
5510 ".text\n"
5511 ".align 16\n"
5512 ".global vgMemCheck_helperc_LOADV8\n"
5513 ".type vgMemCheck_helperc_LOADV8, @function\n"
5514 "vgMemCheck_helperc_LOADV8:\n"
5515 " mov %eax, %edx\n"
5516 " shr $0x10, %edx\n"
5517 " mov primary_map(,%edx,4), %ecx\n"
5518 " movzwl %ax, %edx\n"
5519 " shr $0x2, %edx\n"
5520 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5521 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5522 " jne .LLV8LE2\n" /* jump if not defined */
5523 ".LLV8LE1:\n"
5524 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5525 " ret\n"
5526 ".LLV8LE2:\n"
5527 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5528 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5529 ".LLV8LE3:\n"
5530 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5531 " ret\n"
5532 ".LLV8LE4:\n"
5533 " mov %eax, %ecx\n"
5534 " and $0x3, %ecx\n"
5535 " add %ecx, %ecx\n"
5536 " sar %cl, %edx\n"
5537 " and $0x3, %edx\n"
5538 " cmp $0x2, %edx\n"
5539 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5540 " cmp $0x1, %edx\n"
5541 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5542 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5543 " mov $0x8, %edx\n"
5544 " jmp mc_LOADVn_slow\n"
5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5546 ".previous\n"
5549 #else
5550 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5551 VG_REGPARM(1)
5552 UWord MC_(helperc_LOADV8) ( Addr a )
5554 PROF_EVENT(MCPE_LOADV8);
5556 #ifndef PERF_FAST_LOADV
5557 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5558 #else
5560 UWord sm_off, vabits8;
5561 SecMap* sm;
5563 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5564 PROF_EVENT(MCPE_LOADV8_SLOW1);
5565 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5568 sm = get_secmap_for_reading_low(a);
5569 sm_off = SM_OFF(a);
5570 vabits8 = sm->vabits8[sm_off];
5571 // Convert V bits from compact memory form to expanded register form
5572 // Handle common case quickly: a is mapped, and the entire
5573 // word32 it lives in is addressible.
5574 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5575 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5576 else {
5577 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5578 // the single byte.
5579 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5580 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5581 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5582 else {
5583 /* Slow case: the byte is not all-defined or all-undefined. */
5584 PROF_EVENT(MCPE_LOADV8_SLOW2);
5585 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5589 #endif
5591 #endif
5593 /*------------------------------------------------------------*/
5594 /*--- STOREV8 ---*/
5595 /*------------------------------------------------------------*/
5597 VG_REGPARM(2)
5598 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5600 PROF_EVENT(MCPE_STOREV8);
5602 #ifndef PERF_FAST_STOREV
5603 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5604 #else
5606 UWord sm_off, vabits8;
5607 SecMap* sm;
5609 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5610 PROF_EVENT(MCPE_STOREV8_SLOW1);
5611 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5612 return;
5615 sm = get_secmap_for_reading_low(a);
5616 sm_off = SM_OFF(a);
5617 vabits8 = sm->vabits8[sm_off];
5619 // Clevernesses to speed up storing V bits.
5620 // The 64/32/16 bit cases also have similar clevernesses, but it
5621 // works a little differently to the code below.
5623 // Cleverness 1: sometimes we don't have to write the shadow memory at
5624 // all, if we can tell that what we want to write is the same as what is
5625 // already there. These cases are marked below as "defined on defined" and
5626 // "undefined on undefined".
5628 // Cleverness 2:
5629 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5630 // be written in the secondary map. V bits can be directly written
5631 // if 4 conditions are respected:
5632 // * The address for which V bits are written is naturally aligned
5633 // on 1 byte for STOREV8 (this is always true)
5634 // on 2 bytes for STOREV16
5635 // on 4 bytes for STOREV32
5636 // on 8 bytes for STOREV64.
5637 // * V bits being written are either fully defined or fully undefined.
5638 // (for partially defined V bits, V bits cannot be directly written,
5639 // as the secondary vbits table must be maintained).
5640 // * the secmap is not distinguished (distinguished maps cannot be
5641 // modified).
5642 // * the memory corresponding to the V bits being written is
5643 // accessible (if one or more bytes are not accessible,
5644 // we must call mc_STOREVn_slow in order to report accessibility
5645 // errors).
5646 // Note that for STOREV32 and STOREV64, it is too expensive
5647 // to verify the accessibility of each byte for the benefit it
5648 // brings. Instead, a quicker check is done by comparing to
5649 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5650 // but misses some opportunity of direct modifications.
5651 // Checking each byte accessibility was measured for
5652 // STOREV32+perf tests and was slowing down all perf tests.
5653 // The cases corresponding to cleverness 2 are marked below as
5654 // "direct mod".
5655 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5656 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5657 return; // defined on defined
5659 if (!is_distinguished_sm(sm)
5660 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5661 // direct mod
5662 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5663 &(sm->vabits8[sm_off]) );
5664 return;
5666 PROF_EVENT(MCPE_STOREV8_SLOW2);
5667 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5668 return;
5670 if (V_BITS8_UNDEFINED == vbits8) {
5671 if (vabits8 == VA_BITS8_UNDEFINED) {
5672 return; // undefined on undefined
5674 if (!is_distinguished_sm(sm)
5675 && (VA_BITS2_NOACCESS
5676 != extract_vabits2_from_vabits8(a, vabits8))) {
5677 // direct mod
5678 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5679 &(sm->vabits8[sm_off]) );
5680 return;
5682 PROF_EVENT(MCPE_STOREV8_SLOW3);
5683 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5684 return;
5687 // Partially defined word
5688 PROF_EVENT(MCPE_STOREV8_SLOW4);
5689 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5691 #endif
5695 /*------------------------------------------------------------*/
5696 /*--- Functions called directly from generated code: ---*/
5697 /*--- Value-check failure handlers. ---*/
5698 /*------------------------------------------------------------*/
5700 /* Call these ones when an origin is available ... */
5701 VG_REGPARM(1)
5702 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5703 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5706 VG_REGPARM(1)
5707 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5708 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5711 VG_REGPARM(1)
5712 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5713 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5716 VG_REGPARM(1)
5717 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5718 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5721 VG_REGPARM(2)
5722 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5723 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5726 /* ... and these when an origin isn't available. */
5728 VG_REGPARM(0)
5729 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5730 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5733 VG_REGPARM(0)
5734 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5735 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5738 VG_REGPARM(0)
5739 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5740 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5743 VG_REGPARM(0)
5744 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5745 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5748 VG_REGPARM(1)
5749 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5750 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5754 /*------------------------------------------------------------*/
5755 /*--- Metadata get/set functions, for client requests. ---*/
5756 /*------------------------------------------------------------*/
5758 // Nb: this expands the V+A bits out into register-form V bits, even though
5759 // they're in memory. This is for backward compatibility, and because it's
5760 // probably what the user wants.
5762 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5763 error [no longer used], 3 == addressing error. */
5764 /* Nb: We used to issue various definedness/addressability errors from here,
5765 but we took them out because they ranged from not-very-helpful to
5766 downright annoying, and they complicated the error data structures. */
5767 static Int mc_get_or_set_vbits_for_client (
5768 Addr a,
5769 Addr vbits,
5770 SizeT szB,
5771 Bool setting, /* True <=> set vbits, False <=> get vbits */
5772 Bool is_client_request /* True <=> real user request
5773 False <=> internal call from gdbserver */
5776 SizeT i;
5777 Bool ok;
5778 UChar vbits8;
5780 /* Check that arrays are addressible before doing any getting/setting.
5781 vbits to be checked only for real user request. */
5782 for (i = 0; i < szB; i++) {
5783 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5784 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5785 return 3;
5789 /* Do the copy */
5790 if (setting) {
5791 /* setting */
5792 for (i = 0; i < szB; i++) {
5793 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5794 tl_assert(ok);
5796 } else {
5797 /* getting */
5798 for (i = 0; i < szB; i++) {
5799 ok = get_vbits8(a + i, &vbits8);
5800 tl_assert(ok);
5801 ((UChar*)vbits)[i] = vbits8;
5803 if (is_client_request)
5804 // The bytes in vbits[] have now been set, so mark them as such.
5805 MC_(make_mem_defined)(vbits, szB);
5808 return 1;
5812 /*------------------------------------------------------------*/
5813 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5814 /*------------------------------------------------------------*/
5816 /* For the memory leak detector, say whether an entire 64k chunk of
5817 address space is possibly in use, or not. If in doubt return
5818 True.
5820 Bool MC_(is_within_valid_secondary) ( Addr a )
5822 SecMap* sm = maybe_get_secmap_for ( a );
5823 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5824 /* Definitely not in use. */
5825 return False;
5826 } else {
5827 return True;
5832 /* For the memory leak detector, say whether or not a given word
5833 address is to be regarded as valid. */
5834 Bool MC_(is_valid_aligned_word) ( Addr a )
5836 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5837 tl_assert(VG_IS_WORD_ALIGNED(a));
5838 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5839 return False;
5840 if (sizeof(UWord) == 8) {
5841 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5842 return False;
5844 if (UNLIKELY(MC_(in_ignored_range)(a)))
5845 return False;
5846 else
5847 return True;
5851 /*------------------------------------------------------------*/
5852 /*--- Initialisation ---*/
5853 /*------------------------------------------------------------*/
5855 static void init_shadow_memory ( void )
5857 Int i;
5858 SecMap* sm;
5860 tl_assert(V_BIT_UNDEFINED == 1);
5861 tl_assert(V_BIT_DEFINED == 0);
5862 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5863 tl_assert(V_BITS8_DEFINED == 0);
5865 /* Build the 3 distinguished secondaries */
5866 sm = &sm_distinguished[SM_DIST_NOACCESS];
5867 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5869 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5870 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5872 sm = &sm_distinguished[SM_DIST_DEFINED];
5873 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5875 /* Set up the primary map. */
5876 /* These entries gradually get overwritten as the used address
5877 space expands. */
5878 for (i = 0; i < N_PRIMARY_MAP; i++)
5879 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5881 /* Auxiliary primary maps */
5882 init_auxmap_L1_L2();
5884 /* auxmap_size = auxmap_used = 0;
5885 no ... these are statically initialised */
5887 /* Secondary V bit table */
5888 secVBitTable = createSecVBitTable();
5892 /*------------------------------------------------------------*/
5893 /*--- Sanity check machinery (permanently engaged) ---*/
5894 /*------------------------------------------------------------*/
5896 static Bool mc_cheap_sanity_check ( void )
5898 n_sanity_cheap++;
5899 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5900 /* Check for sane operating level */
5901 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5902 return False;
5903 /* nothing else useful we can rapidly check */
5904 return True;
5907 static Bool mc_expensive_sanity_check ( void )
5909 Int i;
5910 Word n_secmaps_found;
5911 SecMap* sm;
5912 const HChar* errmsg;
5913 Bool bad = False;
5915 if (0) VG_(printf)("expensive sanity check\n");
5916 if (0) return True;
5918 n_sanity_expensive++;
5919 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5921 /* Check for sane operating level */
5922 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5923 return False;
5925 /* Check that the 3 distinguished SMs are still as they should be. */
5927 /* Check noaccess DSM. */
5928 sm = &sm_distinguished[SM_DIST_NOACCESS];
5929 for (i = 0; i < SM_CHUNKS; i++)
5930 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5931 bad = True;
5933 /* Check undefined DSM. */
5934 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5935 for (i = 0; i < SM_CHUNKS; i++)
5936 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5937 bad = True;
5939 /* Check defined DSM. */
5940 sm = &sm_distinguished[SM_DIST_DEFINED];
5941 for (i = 0; i < SM_CHUNKS; i++)
5942 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5943 bad = True;
5945 if (bad) {
5946 VG_(printf)("memcheck expensive sanity: "
5947 "distinguished_secondaries have changed\n");
5948 return False;
5951 /* If we're not checking for undefined value errors, the secondary V bit
5952 * table should be empty. */
5953 if (MC_(clo_mc_level) == 1) {
5954 if (0 != VG_(OSetGen_Size)(secVBitTable))
5955 return False;
5958 /* check the auxiliary maps, very thoroughly */
5959 n_secmaps_found = 0;
5960 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5961 if (errmsg) {
5962 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5963 return False;
5966 /* n_secmaps_found is now the number referred to by the auxiliary
5967 primary map. Now add on the ones referred to by the main
5968 primary map. */
5969 for (i = 0; i < N_PRIMARY_MAP; i++) {
5970 if (primary_map[i] == NULL) {
5971 bad = True;
5972 } else {
5973 if (!is_distinguished_sm(primary_map[i]))
5974 n_secmaps_found++;
5978 /* check that the number of secmaps issued matches the number that
5979 are reachable (iow, no secmap leaks) */
5980 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5981 bad = True;
5983 if (bad) {
5984 VG_(printf)("memcheck expensive sanity: "
5985 "apparent secmap leakage\n");
5986 return False;
5989 if (bad) {
5990 VG_(printf)("memcheck expensive sanity: "
5991 "auxmap covers wrong address space\n");
5992 return False;
5995 /* there is only one pointer to each secmap (expensive) */
5997 return True;
6000 /*------------------------------------------------------------*/
6001 /*--- Command line args ---*/
6002 /*------------------------------------------------------------*/
6004 /* 31 Aug 2015: Vectorised code is now so widespread that
6005 --partial-loads-ok needs to be enabled by default on all platforms.
6006 Not doing so causes lots of false errors. */
6007 Bool MC_(clo_partial_loads_ok) = True;
6008 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
6009 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
6010 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
6011 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
6012 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
6013 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
6014 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString)
6015 | H2S( LchLength64)
6016 | H2S( LchNewArray)
6017 | H2S( LchMultipleInheritance);
6018 Bool MC_(clo_xtree_leak) = False;
6019 const HChar* MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
6020 Bool MC_(clo_workaround_gcc296_bugs) = False;
6021 Int MC_(clo_malloc_fill) = -1;
6022 Int MC_(clo_free_fill) = -1;
6023 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free;
6024 Int MC_(clo_mc_level) = 2;
6025 Bool MC_(clo_show_mismatched_frees) = True;
6027 ExpensiveDefinednessChecks
6028 MC_(clo_expensive_definedness_checks) = EdcAUTO;
6030 Bool MC_(clo_ignore_range_below_sp) = False;
6031 UInt MC_(clo_ignore_range_below_sp__first_offset) = 0;
6032 UInt MC_(clo_ignore_range_below_sp__last_offset) = 0;
6034 static const HChar * MC_(parse_leak_heuristics_tokens) =
6035 "-,stdstring,length64,newarray,multipleinheritance";
6036 /* The first heuristic value (LchNone) has no keyword, as this is
6037 a fake heuristic used to collect the blocks found without any
6038 heuristic. */
6040 static Bool mc_process_cmd_line_options(const HChar* arg)
6042 const HChar* tmp_str;
6043 Int tmp_show;
6045 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6047 /* Set MC_(clo_mc_level):
6048 1 = A bit tracking only
6049 2 = A and V bit tracking, but no V bit origins
6050 3 = A and V bit tracking, and V bit origins
6052 Do this by inspecting --undef-value-errors= and
6053 --track-origins=. Reject the case --undef-value-errors=no
6054 --track-origins=yes as meaningless.
6056 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
6057 if (MC_(clo_mc_level) == 3) {
6058 goto bad_level;
6059 } else {
6060 MC_(clo_mc_level) = 1;
6061 return True;
6064 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
6065 if (MC_(clo_mc_level) == 1)
6066 MC_(clo_mc_level) = 2;
6067 return True;
6069 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
6070 if (MC_(clo_mc_level) == 3)
6071 MC_(clo_mc_level) = 2;
6072 return True;
6074 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
6075 if (MC_(clo_mc_level) == 1) {
6076 goto bad_level;
6077 } else {
6078 MC_(clo_mc_level) = 3;
6079 return True;
6083 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
6084 else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
6085 MC_(parse_leak_kinds_tokens),
6086 MC_(clo_error_for_leak_kinds)) {}
6087 else if VG_USET_CLO(arg, "--show-leak-kinds",
6088 MC_(parse_leak_kinds_tokens),
6089 MC_(clo_show_leak_kinds)) {}
6090 else if VG_USET_CLO(arg, "--leak-check-heuristics",
6091 MC_(parse_leak_heuristics_tokens),
6092 MC_(clo_leak_check_heuristics)) {}
6093 else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
6094 if (tmp_show) {
6095 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
6096 } else {
6097 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
6100 else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
6101 if (tmp_show) {
6102 MC_(clo_show_leak_kinds) |= R2S(Possible);
6103 } else {
6104 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
6107 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
6108 MC_(clo_workaround_gcc296_bugs)) {}
6110 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
6111 0, 10*1000*1000*1000LL) {}
6113 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
6114 MC_(clo_freelist_big_blocks),
6115 0, 10*1000*1000*1000LL) {}
6117 else if VG_XACT_CLO(arg, "--leak-check=no",
6118 MC_(clo_leak_check), LC_Off) {}
6119 else if VG_XACT_CLO(arg, "--leak-check=summary",
6120 MC_(clo_leak_check), LC_Summary) {}
6121 else if VG_XACT_CLO(arg, "--leak-check=yes",
6122 MC_(clo_leak_check), LC_Full) {}
6123 else if VG_XACT_CLO(arg, "--leak-check=full",
6124 MC_(clo_leak_check), LC_Full) {}
6126 else if VG_XACT_CLO(arg, "--leak-resolution=low",
6127 MC_(clo_leak_resolution), Vg_LowRes) {}
6128 else if VG_XACT_CLO(arg, "--leak-resolution=med",
6129 MC_(clo_leak_resolution), Vg_MedRes) {}
6130 else if VG_XACT_CLO(arg, "--leak-resolution=high",
6131 MC_(clo_leak_resolution), Vg_HighRes) {}
6133 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
6134 Bool ok = parse_ignore_ranges(tmp_str);
6135 if (!ok) {
6136 VG_(message)(Vg_DebugMsg,
6137 "ERROR: --ignore-ranges: "
6138 "invalid syntax, or end <= start in range\n");
6139 return False;
6141 if (gIgnoredAddressRanges) {
6142 UInt i;
6143 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6144 UWord val = IAR_INVALID;
6145 UWord key_min = ~(UWord)0;
6146 UWord key_max = (UWord)0;
6147 VG_(indexRangeMap)( &key_min, &key_max, &val,
6148 gIgnoredAddressRanges, i );
6149 tl_assert(key_min <= key_max);
6150 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
6151 if (key_max - key_min > limit && val == IAR_CommandLine) {
6152 VG_(message)(Vg_DebugMsg,
6153 "ERROR: --ignore-ranges: suspiciously large range:\n");
6154 VG_(message)(Vg_DebugMsg,
6155 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
6156 key_max - key_min + 1);
6157 return False;
6163 else if VG_STR_CLO(arg, "--ignore-range-below-sp", tmp_str) {
6164 /* This seems at first a bit weird, but: in order to imply
6165 a non-wrapped-around address range, the first offset needs to be
6166 larger than the second one. For example
6167 --ignore-range-below-sp=8192,8189
6168 would cause accesses to in the range [SP-8192, SP-8189] to be
6169 ignored. */
6170 UInt offs1 = 0, offs2 = 0;
6171 Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
6172 // Ensure we used all the text after the '=' sign.
6173 if (ok && *tmp_str != 0) ok = False;
6174 if (!ok) {
6175 VG_(message)(Vg_DebugMsg,
6176 "ERROR: --ignore-range-below-sp: invalid syntax. "
6177 " Expected \"...=decimalnumber-decimalnumber\".\n");
6178 return False;
6180 if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
6181 VG_(message)(Vg_DebugMsg,
6182 "ERROR: --ignore-range-below-sp: suspiciously large "
6183 "offset(s): %u and %u\n", offs1, offs2);
6184 return False;
6186 if (offs1 <= offs2) {
6187 VG_(message)(Vg_DebugMsg,
6188 "ERROR: --ignore-range-below-sp: invalid offsets "
6189 "(the first must be larger): %u and %u\n", offs1, offs2);
6190 return False;
6192 tl_assert(offs1 > offs2);
6193 if (offs1 - offs2 > 4096 /*arbitrary*/) {
6194 VG_(message)(Vg_DebugMsg,
6195 "ERROR: --ignore-range-below-sp: suspiciously large "
6196 "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
6197 return False;
6199 MC_(clo_ignore_range_below_sp) = True;
6200 MC_(clo_ignore_range_below_sp__first_offset) = offs1;
6201 MC_(clo_ignore_range_below_sp__last_offset) = offs2;
6202 return True;
6205 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
6206 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
6208 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
6209 MC_(clo_keep_stacktraces), KS_alloc) {}
6210 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
6211 MC_(clo_keep_stacktraces), KS_free) {}
6212 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
6213 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
6214 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
6215 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
6216 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
6217 MC_(clo_keep_stacktraces), KS_none) {}
6219 else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
6220 MC_(clo_show_mismatched_frees)) {}
6222 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=no",
6223 MC_(clo_expensive_definedness_checks), EdcNO) {}
6224 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=auto",
6225 MC_(clo_expensive_definedness_checks), EdcAUTO) {}
6226 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=yes",
6227 MC_(clo_expensive_definedness_checks), EdcYES) {}
6229 else if VG_BOOL_CLO(arg, "--xtree-leak",
6230 MC_(clo_xtree_leak)) {}
6231 else if VG_STR_CLO (arg, "--xtree-leak-file",
6232 MC_(clo_xtree_leak_file)) {}
6234 else
6235 return VG_(replacement_malloc_process_cmd_line_option)(arg);
6237 return True;
6240 bad_level:
6241 VG_(fmsg_bad_option)(arg,
6242 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6245 static void mc_print_usage(void)
6247 VG_(printf)(
6248 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6249 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6250 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6251 " [definite,possible]\n"
6252 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6253 " [definite,possible]\n"
6254 " where kind is one of:\n"
6255 " definite indirect possible reachable all none\n"
6256 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6257 " improving leak search false positive [all]\n"
6258 " where heur is one of:\n"
6259 " stdstring length64 newarray multipleinheritance all none\n"
6260 " --show-reachable=yes same as --show-leak-kinds=all\n"
6261 " --show-reachable=no --show-possibly-lost=yes\n"
6262 " same as --show-leak-kinds=definite,possible\n"
6263 " --show-reachable=no --show-possibly-lost=no\n"
6264 " same as --show-leak-kinds=definite\n"
6265 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6266 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6267 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6268 " --track-origins=no|yes show origins of undefined values? [no]\n"
6269 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6270 " --expensive-definedness-checks=no|auto|yes\n"
6271 " Use extra-precise definedness tracking [auto]\n"
6272 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6273 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6274 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6275 " Use --ignore-range-below-sp instead.\n"
6276 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6277 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6278 " accesses at the given offsets below SP\n"
6279 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6280 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6281 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6282 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6283 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6287 static void mc_print_debug_usage(void)
6289 VG_(printf)(
6290 " (none)\n"
6295 /*------------------------------------------------------------*/
6296 /*--- Client blocks ---*/
6297 /*------------------------------------------------------------*/
6299 /* Client block management:
6301 This is managed as an expanding array of client block descriptors.
6302 Indices of live descriptors are issued to the client, so it can ask
6303 to free them later. Therefore we cannot slide live entries down
6304 over dead ones. Instead we must use free/inuse flags and scan for
6305 an empty slot at allocation time. This in turn means allocation is
6306 relatively expensive, so we hope this does not happen too often.
6308 An unused block has start == size == 0
6311 /* type CGenBlock is defined in mc_include.h */
6313 /* This subsystem is self-initialising. */
6314 static UWord cgb_size = 0;
6315 static UWord cgb_used = 0;
6316 static CGenBlock* cgbs = NULL;
6318 /* Stats for this subsystem. */
6319 static ULong cgb_used_MAX = 0; /* Max in use. */
6320 static ULong cgb_allocs = 0; /* Number of allocs. */
6321 static ULong cgb_discards = 0; /* Number of discards. */
6322 static ULong cgb_search = 0; /* Number of searches. */
6325 /* Get access to the client block array. */
6326 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
6327 /*OUT*/UWord* nBlocks )
6329 *blocks = cgbs;
6330 *nBlocks = cgb_used;
6334 static
6335 Int alloc_client_block ( void )
6337 UWord i, sz_new;
6338 CGenBlock* cgbs_new;
6340 cgb_allocs++;
6342 for (i = 0; i < cgb_used; i++) {
6343 cgb_search++;
6344 if (cgbs[i].start == 0 && cgbs[i].size == 0)
6345 return i;
6348 /* Not found. Try to allocate one at the end. */
6349 if (cgb_used < cgb_size) {
6350 cgb_used++;
6351 return cgb_used-1;
6354 /* Ok, we have to allocate a new one. */
6355 tl_assert(cgb_used == cgb_size);
6356 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
6358 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
6359 for (i = 0; i < cgb_used; i++)
6360 cgbs_new[i] = cgbs[i];
6362 if (cgbs != NULL)
6363 VG_(free)( cgbs );
6364 cgbs = cgbs_new;
6366 cgb_size = sz_new;
6367 cgb_used++;
6368 if (cgb_used > cgb_used_MAX)
6369 cgb_used_MAX = cgb_used;
6370 return cgb_used-1;
6374 static void show_client_block_stats ( void )
6376 VG_(message)(Vg_DebugMsg,
6377 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6378 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6381 static void print_monitor_help ( void )
6383 VG_(gdb_printf)
6385 "\n"
6386 "memcheck monitor commands:\n"
6387 " xb <addr> [<len>]\n"
6388 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6389 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6390 " Then prints the bytes values below the corresponding validity bits\n"
6391 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6392 " Example: xb 0x8049c78 10\n"
6393 " get_vbits <addr> [<len>]\n"
6394 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6395 " make_memory [noaccess|undefined\n"
6396 " |defined|Definedifaddressable] <addr> [<len>]\n"
6397 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6398 " check_memory [addressable|defined] <addr> [<len>]\n"
6399 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6400 " and outputs a description of <addr>\n"
6401 " leak_check [full*|summary|xtleak]\n"
6402 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6403 " [heuristics heur1,heur2,...]\n"
6404 " [increased*|changed|any]\n"
6405 " [unlimited*|limited <max_loss_records_output>]\n"
6406 " * = defaults\n"
6407 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6408 " where kind is one of:\n"
6409 " definite indirect possible reachable all none\n"
6410 " where heur is one of:\n"
6411 " stdstring length64 newarray multipleinheritance all none*\n"
6412 " Examples: leak_check\n"
6413 " leak_check summary any\n"
6414 " leak_check full kinds indirect,possible\n"
6415 " leak_check full reachable any limited 100\n"
6416 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6417 " [unlimited*|limited <max_blocks>]\n"
6418 " [heuristics heur1,heur2,...]\n"
6419 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6420 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6421 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6422 " * = defaults\n"
6423 " who_points_at <addr> [<len>]\n"
6424 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6425 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6426 " with len > 1, will also show \"interior pointers\")\n"
6427 " xtmemory [<filename>]\n"
6428 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6429 "\n");
6432 /* Print szB bytes at address, with a format similar to the gdb command
6433 x /<szB>xb address.
6434 res[i] == 1 indicates the corresponding byte is addressable. */
6435 static void gdb_xb (Addr address, SizeT szB, Int res[])
6437 UInt i;
6439 for (i = 0; i < szB; i++) {
6440 UInt bnr = i % 8;
6441 if (bnr == 0) {
6442 if (i != 0)
6443 VG_(printf) ("\n"); // Terminate previous line
6444 VG_(printf) ("%p:", (void*)(address+i));
6446 if (res[i] == 1)
6447 VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6448 else
6449 VG_(printf) ("\t0x??");
6451 VG_(printf) ("\n"); // Terminate previous line
6455 /* Returns the address of the next non space character,
6456 or address of the string terminator. */
6457 static HChar* next_non_space (HChar *s)
6459 while (*s && *s == ' ')
6460 s++;
6461 return s;
6464 /* Parse an integer slice, i.e. a single integer or a range of integer.
6465 Syntax is:
6466 <integer>[..<integer> ]
6467 (spaces are allowed before and/or after ..).
6468 Return True if range correctly parsed, False otherwise. */
6469 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6470 UInt *from, UInt *to)
6472 HChar* wl;
6473 HChar *endptr;
6474 endptr = NULL;////
6475 wl = VG_(strtok_r) (s, " ", saveptr);
6477 /* slice must start with an integer. */
6478 if (wl == NULL) {
6479 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6480 return False;
6482 *from = VG_(strtoull10) (wl, &endptr);
6483 if (endptr == wl) {
6484 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6485 return False;
6488 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6489 /* wl token is an integer terminating the string
6490 or else next token does not start with .
6491 In both cases, the slice is a single integer. */
6492 *to = *from;
6493 return True;
6496 if (*endptr == '\0') {
6497 // iii .. => get the next token
6498 wl = VG_(strtok_r) (NULL, " .", saveptr);
6499 } else {
6500 // It must be iii..
6501 if (*endptr != '.' && *(endptr+1) != '.') {
6502 VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6503 return False;
6505 if ( *(endptr+2) == ' ') {
6506 // It must be iii.. jjj => get the next token
6507 wl = VG_(strtok_r) (NULL, " .", saveptr);
6508 } else {
6509 // It must be iii..jjj
6510 wl = endptr+2;
6514 *to = VG_(strtoull10) (wl, &endptr);
6515 if (*endptr != '\0') {
6516 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6517 return False;
6520 if (*from > *to) {
6521 VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6522 "in slice <from>..<to>\n");
6523 return False;
6526 return True;
6529 /* return True if request recognised, False otherwise */
6530 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6532 HChar* wcmd;
6533 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
6534 HChar *ssaveptr;
6536 VG_(strcpy) (s, req);
6538 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6539 /* NB: if possible, avoid introducing a new command below which
6540 starts with the same first letter(s) as an already existing
6541 command. This ensures a shorter abbreviation for the user. */
6542 switch (VG_(keyword_id)
6543 ("help get_vbits leak_check make_memory check_memory "
6544 "block_list who_points_at xb xtmemory",
6545 wcmd, kwd_report_duplicated_matches)) {
6546 case -2: /* multiple matches */
6547 return True;
6548 case -1: /* not found */
6549 return False;
6550 case 0: /* help */
6551 print_monitor_help();
6552 return True;
6553 case 1: { /* get_vbits */
6554 Addr address;
6555 SizeT szB = 1;
6556 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6557 UChar vbits;
6558 Int i;
6559 Int unaddressable = 0;
6560 for (i = 0; i < szB; i++) {
6561 Int res = mc_get_or_set_vbits_for_client
6562 (address+i, (Addr) &vbits, 1,
6563 False, /* get them */
6564 False /* is client request */ );
6565 /* we are before the first character on next line, print a \n. */
6566 if ((i % 32) == 0 && i != 0)
6567 VG_(printf) ("\n");
6568 /* we are before the next block of 4 starts, print a space. */
6569 else if ((i % 4) == 0 && i != 0)
6570 VG_(printf) (" ");
6571 if (res == 1) {
6572 VG_(printf) ("%02x", vbits);
6573 } else {
6574 tl_assert(3 == res);
6575 unaddressable++;
6576 VG_(printf) ("__");
6579 VG_(printf) ("\n");
6580 if (unaddressable) {
6581 VG_(printf)
6582 ("Address %p len %lu has %d bytes unaddressable\n",
6583 (void *)address, szB, unaddressable);
6586 return True;
6588 case 2: { /* leak_check */
6589 Int err = 0;
6590 LeakCheckParams lcp;
6591 HChar* xt_filename = NULL;
6592 HChar* kw;
6594 lcp.mode = LC_Full;
6595 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6596 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6597 lcp.heuristics = 0;
6598 lcp.deltamode = LCD_Increased;
6599 lcp.max_loss_records_output = 999999999;
6600 lcp.requested_by_monitor_command = True;
6601 lcp.xt_filename = NULL;
6603 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6604 kw != NULL;
6605 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6606 switch (VG_(keyword_id)
6607 ("full summary xtleak "
6608 "kinds reachable possibleleak definiteleak "
6609 "heuristics "
6610 "increased changed any "
6611 "unlimited limited ",
6612 kw, kwd_report_all)) {
6613 case -2: err++; break;
6614 case -1: err++; break;
6615 case 0: /* full */
6616 lcp.mode = LC_Full; break;
6617 case 1: /* summary */
6618 lcp.mode = LC_Summary; break;
6619 case 2: /* xtleak */
6620 lcp.mode = LC_Full;
6621 xt_filename
6622 = VG_(expand_file_name)("--xtleak-mc_main.c",
6623 "xtleak.kcg.%p.%n");
6624 lcp.xt_filename = xt_filename;
6625 break;
6626 case 3: { /* kinds */
6627 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6628 if (wcmd == NULL
6629 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6630 True/*allow_all*/,
6631 wcmd,
6632 &lcp.show_leak_kinds)) {
6633 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6634 err++;
6636 break;
6638 case 4: /* reachable */
6639 lcp.show_leak_kinds = MC_(all_Reachedness)();
6640 break;
6641 case 5: /* possibleleak */
6642 lcp.show_leak_kinds
6643 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6644 break;
6645 case 6: /* definiteleak */
6646 lcp.show_leak_kinds = R2S(Unreached);
6647 break;
6648 case 7: { /* heuristics */
6649 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6650 if (wcmd == NULL
6651 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6652 True,/*allow_all*/
6653 wcmd,
6654 &lcp.heuristics)) {
6655 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6656 err++;
6658 break;
6660 case 8: /* increased */
6661 lcp.deltamode = LCD_Increased; break;
6662 case 9: /* changed */
6663 lcp.deltamode = LCD_Changed; break;
6664 case 10: /* any */
6665 lcp.deltamode = LCD_Any; break;
6666 case 11: /* unlimited */
6667 lcp.max_loss_records_output = 999999999; break;
6668 case 12: { /* limited */
6669 Int int_value;
6670 const HChar* endptr;
6672 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6673 if (wcmd == NULL) {
6674 int_value = 0;
6675 endptr = "empty"; /* to report an error below */
6676 } else {
6677 HChar *the_end;
6678 int_value = VG_(strtoll10) (wcmd, &the_end);
6679 endptr = the_end;
6681 if (*endptr != '\0')
6682 VG_(gdb_printf) ("missing or malformed integer value\n");
6683 else if (int_value > 0)
6684 lcp.max_loss_records_output = (UInt) int_value;
6685 else
6686 VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6687 " got %d\n", int_value);
6688 break;
6690 default:
6691 tl_assert (0);
6694 if (!err)
6695 MC_(detect_memory_leaks)(tid, &lcp);
6696 if (xt_filename != NULL)
6697 VG_(free)(xt_filename);
6698 return True;
6701 case 3: { /* make_memory */
6702 Addr address;
6703 SizeT szB = 1;
6704 Int kwdid = VG_(keyword_id)
6705 ("noaccess undefined defined Definedifaddressable",
6706 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6707 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6708 return True;
6709 switch (kwdid) {
6710 case -2: break;
6711 case -1: break;
6712 case 0: MC_(make_mem_noaccess) (address, szB); break;
6713 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6714 MC_OKIND_USER ); break;
6715 case 2: MC_(make_mem_defined) ( address, szB ); break;
6716 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6717 default: tl_assert(0);
6719 return True;
6722 case 4: { /* check_memory */
6723 Addr address;
6724 SizeT szB = 1;
6725 Addr bad_addr;
6726 UInt okind;
6727 const HChar* src;
6728 UInt otag;
6729 UInt ecu;
6730 ExeContext* origin_ec;
6731 MC_ReadResult res;
6733 Int kwdid = VG_(keyword_id)
6734 ("addressable defined",
6735 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6736 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6737 return True;
6738 switch (kwdid) {
6739 case -2: break;
6740 case -1: break;
6741 case 0: /* addressable */
6742 if (is_mem_addressable ( address, szB, &bad_addr ))
6743 VG_(printf) ("Address %p len %lu addressable\n",
6744 (void *)address, szB);
6745 else
6746 VG_(printf)
6747 ("Address %p len %lu not addressable:\nbad address %p\n",
6748 (void *)address, szB, (void *) bad_addr);
6749 // Describe this (probably live) address with current epoch
6750 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6751 break;
6752 case 1: /* defined */
6753 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6754 if (MC_AddrErr == res)
6755 VG_(printf)
6756 ("Address %p len %lu not addressable:\nbad address %p\n",
6757 (void *)address, szB, (void *) bad_addr);
6758 else if (MC_ValueErr == res) {
6759 okind = otag & 3;
6760 switch (okind) {
6761 case MC_OKIND_STACK:
6762 src = " was created by a stack allocation"; break;
6763 case MC_OKIND_HEAP:
6764 src = " was created by a heap allocation"; break;
6765 case MC_OKIND_USER:
6766 src = " was created by a client request"; break;
6767 case MC_OKIND_UNKNOWN:
6768 src = ""; break;
6769 default: tl_assert(0);
6771 VG_(printf)
6772 ("Address %p len %lu not defined:\n"
6773 "Uninitialised value at %p%s\n",
6774 (void *)address, szB, (void *) bad_addr, src);
6775 ecu = otag & ~3;
6776 if (VG_(is_plausible_ECU)(ecu)) {
6777 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6778 VG_(pp_ExeContext)( origin_ec );
6781 else
6782 VG_(printf) ("Address %p len %lu defined\n",
6783 (void *)address, szB);
6784 // Describe this (probably live) address with current epoch
6785 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6786 break;
6787 default: tl_assert(0);
6789 return True;
6792 case 5: { /* block_list */
6793 HChar* wl;
6794 HChar *the_end;
6795 UInt lr_nr_from = 0;
6796 UInt lr_nr_to = 0;
6798 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6799 UInt limit_blocks = 999999999;
6800 Int int_value;
6801 UInt heuristics = 0;
6803 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6804 wl != NULL;
6805 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6806 switch (VG_(keyword_id) ("unlimited limited heuristics ",
6807 wl, kwd_report_all)) {
6808 case -2: return True;
6809 case -1: return True;
6810 case 0: /* unlimited */
6811 limit_blocks = 999999999; break;
6812 case 1: /* limited */
6813 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6814 if (wcmd == NULL) {
6815 VG_(gdb_printf) ("missing integer value\n");
6816 return True;
6818 int_value = VG_(strtoll10) (wcmd, &the_end);
6819 if (*the_end != '\0') {
6820 VG_(gdb_printf) ("malformed integer value\n");
6821 return True;
6823 if (int_value <= 0) {
6824 VG_(gdb_printf) ("max_blocks must be >= 1,"
6825 " got %d\n", int_value);
6826 return True;
6828 limit_blocks = (UInt) int_value;
6829 break;
6830 case 2: /* heuristics */
6831 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6832 if (wcmd == NULL
6833 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6834 True,/*allow_all*/
6835 wcmd,
6836 &heuristics)) {
6837 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6838 return True;
6840 break;
6841 default:
6842 tl_assert (0);
6845 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6846 is 1 more than the index in lr_array. */
6847 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6848 lr_nr_to-1,
6849 limit_blocks,
6850 heuristics))
6851 VG_(gdb_printf) ("invalid loss record nr\n");
6853 return True;
6856 case 6: { /* who_points_at */
6857 Addr address;
6858 SizeT szB = 1;
6860 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6861 return True;
6862 if (address == (Addr) 0) {
6863 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6864 return True;
6866 MC_(who_points_at) (address, szB);
6867 return True;
6870 case 7: { /* xb */
6871 Addr address;
6872 SizeT szB = 1;
6873 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6874 UChar vbits[8];
6875 Int res[8];
6876 Int i;
6877 Int unaddressable = 0;
6878 for (i = 0; i < szB; i++) {
6879 Int bnr = i % 8;
6880 res[bnr] = mc_get_or_set_vbits_for_client
6881 (address+i, (Addr) &vbits[bnr], 1,
6882 False, /* get them */
6883 False /* is client request */ );
6884 /* We going to print the first vabits of a new line.
6885 Terminate the previous line if needed: prints a line with the
6886 address and the data. */
6887 if (bnr == 0) {
6888 if (i != 0) {
6889 VG_(printf) ("\n");
6890 gdb_xb (address + i - 8, 8, res);
6892 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6894 if (res[bnr] == 1) {
6895 VG_(printf) ("\t %02x", vbits[bnr]);
6896 } else {
6897 tl_assert(3 == res[bnr]);
6898 unaddressable++;
6899 VG_(printf) ("\t __");
6902 VG_(printf) ("\n");
6903 if (szB % 8 == 0 && szB > 0)
6904 gdb_xb (address + szB - 8, 8, res);
6905 else
6906 gdb_xb (address + szB - szB % 8, szB % 8, res);
6907 if (unaddressable) {
6908 VG_(printf)
6909 ("Address %p len %lu has %d bytes unaddressable\n",
6910 (void *)address, szB, unaddressable);
6913 return True;
6916 case 8: { /* xtmemory */
6917 HChar* filename;
6918 filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
6919 MC_(xtmemory_report)(filename, False);
6920 return True;
6923 default:
6924 tl_assert(0);
6925 return False;
6929 /*------------------------------------------------------------*/
6930 /*--- Client requests ---*/
6931 /*------------------------------------------------------------*/
6933 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6935 Int i;
6936 Addr bad_addr;
6938 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6939 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6940 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6941 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6942 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6943 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6944 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6945 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6946 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6947 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
6948 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
6949 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
6950 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
6951 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6952 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6953 return False;
6955 switch (arg[0]) {
6956 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
6957 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
6958 if (!ok)
6959 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
6960 *ret = ok ? (UWord)NULL : bad_addr;
6961 break;
6964 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
6965 Bool errorV = False;
6966 Addr bad_addrV = 0;
6967 UInt otagV = 0;
6968 Bool errorA = False;
6969 Addr bad_addrA = 0;
6970 is_mem_defined_comprehensive(
6971 arg[1], arg[2],
6972 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
6974 if (errorV) {
6975 MC_(record_user_error) ( tid, bad_addrV,
6976 /*isAddrErr*/False, otagV );
6978 if (errorA) {
6979 MC_(record_user_error) ( tid, bad_addrA,
6980 /*isAddrErr*/True, 0 );
6982 /* Return the lower of the two erring addresses, if any. */
6983 *ret = 0;
6984 if (errorV && !errorA) {
6985 *ret = bad_addrV;
6987 if (!errorV && errorA) {
6988 *ret = bad_addrA;
6990 if (errorV && errorA) {
6991 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
6993 break;
6996 case VG_USERREQ__DO_LEAK_CHECK: {
6997 LeakCheckParams lcp;
6999 if (arg[1] == 0)
7000 lcp.mode = LC_Full;
7001 else if (arg[1] == 1)
7002 lcp.mode = LC_Summary;
7003 else {
7004 VG_(message)(Vg_UserMsg,
7005 "Warning: unknown memcheck leak search mode\n");
7006 lcp.mode = LC_Full;
7009 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7010 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7011 lcp.heuristics = MC_(clo_leak_check_heuristics);
7013 if (arg[2] == 0)
7014 lcp.deltamode = LCD_Any;
7015 else if (arg[2] == 1)
7016 lcp.deltamode = LCD_Increased;
7017 else if (arg[2] == 2)
7018 lcp.deltamode = LCD_Changed;
7019 else {
7020 VG_(message)
7021 (Vg_UserMsg,
7022 "Warning: unknown memcheck leak search deltamode\n");
7023 lcp.deltamode = LCD_Any;
7025 lcp.max_loss_records_output = 999999999;
7026 lcp.requested_by_monitor_command = False;
7027 lcp.xt_filename = NULL;
7029 MC_(detect_memory_leaks)(tid, &lcp);
7030 *ret = 0; /* return value is meaningless */
7031 break;
7034 case VG_USERREQ__MAKE_MEM_NOACCESS:
7035 MC_(make_mem_noaccess) ( arg[1], arg[2] );
7036 *ret = -1;
7037 break;
7039 case VG_USERREQ__MAKE_MEM_UNDEFINED:
7040 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
7041 MC_OKIND_USER );
7042 *ret = -1;
7043 break;
7045 case VG_USERREQ__MAKE_MEM_DEFINED:
7046 MC_(make_mem_defined) ( arg[1], arg[2] );
7047 *ret = -1;
7048 break;
7050 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
7051 make_mem_defined_if_addressable ( arg[1], arg[2] );
7052 *ret = -1;
7053 break;
7055 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
7056 if (arg[1] != 0 && arg[2] != 0) {
7057 i = alloc_client_block();
7058 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7059 cgbs[i].start = arg[1];
7060 cgbs[i].size = arg[2];
7061 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
7062 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
7063 *ret = i;
7064 } else
7065 *ret = -1;
7066 break;
7068 case VG_USERREQ__DISCARD: /* discard */
7069 if (cgbs == NULL
7070 || arg[2] >= cgb_used ||
7071 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
7072 *ret = 1;
7073 } else {
7074 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
7075 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
7076 VG_(free)(cgbs[arg[2]].desc);
7077 cgb_discards++;
7078 *ret = 0;
7080 break;
7082 case VG_USERREQ__GET_VBITS:
7083 *ret = mc_get_or_set_vbits_for_client
7084 ( arg[1], arg[2], arg[3],
7085 False /* get them */,
7086 True /* is client request */ );
7087 break;
7089 case VG_USERREQ__SET_VBITS:
7090 *ret = mc_get_or_set_vbits_for_client
7091 ( arg[1], arg[2], arg[3],
7092 True /* set them */,
7093 True /* is client request */ );
7094 break;
7096 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
7097 UWord** argp = (UWord**)arg;
7098 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7099 // if no prior leak checks performed).
7100 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
7101 *argp[2] = MC_(bytes_dubious);
7102 *argp[3] = MC_(bytes_reachable);
7103 *argp[4] = MC_(bytes_suppressed);
7104 // there is no argp[5]
7105 //*argp[5] = MC_(bytes_indirect);
7106 // XXX need to make *argp[1-4] defined; currently done in the
7107 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7108 *ret = 0;
7109 return True;
7111 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
7112 UWord** argp = (UWord**)arg;
7113 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7114 // if no prior leak checks performed).
7115 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
7116 *argp[2] = MC_(blocks_dubious);
7117 *argp[3] = MC_(blocks_reachable);
7118 *argp[4] = MC_(blocks_suppressed);
7119 // there is no argp[5]
7120 //*argp[5] = MC_(blocks_indirect);
7121 // XXX need to make *argp[1-4] defined; currently done in the
7122 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7123 *ret = 0;
7124 return True;
7126 case VG_USERREQ__MALLOCLIKE_BLOCK: {
7127 Addr p = (Addr)arg[1];
7128 SizeT sizeB = arg[2];
7129 UInt rzB = arg[3];
7130 Bool is_zeroed = (Bool)arg[4];
7132 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
7133 MC_AllocCustom, MC_(malloc_list) );
7134 if (rzB > 0) {
7135 MC_(make_mem_noaccess) ( p - rzB, rzB);
7136 MC_(make_mem_noaccess) ( p + sizeB, rzB);
7138 return True;
7140 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
7141 Addr p = (Addr)arg[1];
7142 SizeT oldSizeB = arg[2];
7143 SizeT newSizeB = arg[3];
7144 UInt rzB = arg[4];
7146 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
7147 return True;
7149 case VG_USERREQ__FREELIKE_BLOCK: {
7150 Addr p = (Addr)arg[1];
7151 UInt rzB = arg[2];
7153 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
7154 return True;
7157 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
7158 HChar* s = (HChar*)arg[1];
7159 Addr dst = (Addr) arg[2];
7160 Addr src = (Addr) arg[3];
7161 SizeT len = (SizeT)arg[4];
7162 MC_(record_overlap_error)(tid, s, src, dst, len);
7163 return True;
7166 case VG_USERREQ__CREATE_MEMPOOL: {
7167 Addr pool = (Addr)arg[1];
7168 UInt rzB = arg[2];
7169 Bool is_zeroed = (Bool)arg[3];
7170 UInt flags = arg[4];
7172 // The create_mempool function does not know these mempool flags,
7173 // pass as booleans.
7174 MC_(create_mempool) ( pool, rzB, is_zeroed,
7175 (flags & VALGRIND_MEMPOOL_AUTO_FREE),
7176 (flags & VALGRIND_MEMPOOL_METAPOOL) );
7177 return True;
7180 case VG_USERREQ__DESTROY_MEMPOOL: {
7181 Addr pool = (Addr)arg[1];
7183 MC_(destroy_mempool) ( pool );
7184 return True;
7187 case VG_USERREQ__MEMPOOL_ALLOC: {
7188 Addr pool = (Addr)arg[1];
7189 Addr addr = (Addr)arg[2];
7190 UInt size = arg[3];
7192 MC_(mempool_alloc) ( tid, pool, addr, size );
7193 return True;
7196 case VG_USERREQ__MEMPOOL_FREE: {
7197 Addr pool = (Addr)arg[1];
7198 Addr addr = (Addr)arg[2];
7200 MC_(mempool_free) ( pool, addr );
7201 return True;
7204 case VG_USERREQ__MEMPOOL_TRIM: {
7205 Addr pool = (Addr)arg[1];
7206 Addr addr = (Addr)arg[2];
7207 UInt size = arg[3];
7209 MC_(mempool_trim) ( pool, addr, size );
7210 return True;
7213 case VG_USERREQ__MOVE_MEMPOOL: {
7214 Addr poolA = (Addr)arg[1];
7215 Addr poolB = (Addr)arg[2];
7217 MC_(move_mempool) ( poolA, poolB );
7218 return True;
7221 case VG_USERREQ__MEMPOOL_CHANGE: {
7222 Addr pool = (Addr)arg[1];
7223 Addr addrA = (Addr)arg[2];
7224 Addr addrB = (Addr)arg[3];
7225 UInt size = arg[4];
7227 MC_(mempool_change) ( pool, addrA, addrB, size );
7228 return True;
7231 case VG_USERREQ__MEMPOOL_EXISTS: {
7232 Addr pool = (Addr)arg[1];
7234 *ret = (UWord) MC_(mempool_exists) ( pool );
7235 return True;
7238 case VG_USERREQ__GDB_MONITOR_COMMAND: {
7239 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
7240 if (handled)
7241 *ret = 1;
7242 else
7243 *ret = 0;
7244 return handled;
7247 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
7248 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
7249 Bool addRange
7250 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
7251 Bool ok
7252 = modify_ignore_ranges(addRange, arg[1], arg[2]);
7253 *ret = ok ? 1 : 0;
7254 return True;
7257 default:
7258 VG_(message)(
7259 Vg_UserMsg,
7260 "Warning: unknown memcheck client request code %llx\n",
7261 (ULong)arg[0]
7263 return False;
7265 return True;
7269 /*------------------------------------------------------------*/
7270 /*--- Crude profiling machinery. ---*/
7271 /*------------------------------------------------------------*/
7273 // We track a number of interesting events (using PROF_EVENT)
7274 // if MC_PROFILE_MEMORY is defined.
7276 #ifdef MC_PROFILE_MEMORY
7278 ULong MC_(event_ctr)[MCPE_LAST];
7280 /* Event counter names. Use the name of the function that increases the
7281 event counter. Drop any MC_() and mc_ prefices. */
7282 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
7283 [MCPE_LOADVN_SLOW] = "LOADVn_slow",
7284 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
7285 [MCPE_STOREVN_SLOW] = "STOREVn_slow",
7286 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
7287 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
7288 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
7289 "make_aligned_word32_undefined_slow",
7290 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
7291 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
7292 "make_aligned_word64_undefined_slow",
7293 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
7294 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
7295 "make_aligned_word32_noaccess_slow",
7296 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
7297 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
7298 "make_aligned_word64_noaccess_slow",
7299 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
7300 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
7301 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
7302 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
7303 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
7304 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
7305 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
7306 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
7307 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
7308 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
7309 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
7310 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
7311 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
7312 [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
7313 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
7314 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
7315 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
7316 "is_mem_defined_comprehensive(loop)",
7317 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
7318 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
7319 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
7320 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
7321 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
7322 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
7323 "set_address_range_perms(single-secmap)",
7324 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
7325 "set_address_range_perms(startof-secmap)",
7326 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
7327 "set_address_range_perms(multiple-secmaps)",
7328 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
7329 "set_address_range_perms(dist-sm1)",
7330 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
7331 "set_address_range_perms(dist-sm2)",
7332 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
7333 "set_address_range_perms(dist-sm1-quick)",
7334 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
7335 "set_address_range_perms(dist-sm2-quick)",
7336 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
7337 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
7338 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
7339 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
7340 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
7341 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
7342 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
7343 "set_address_range_perms(loop64K-free-dist-sm)",
7344 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
7345 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256",
7346 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
7347 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
7348 [MCPE_LOADV64] = "LOADV64",
7349 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1",
7350 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2",
7351 [MCPE_STOREV64] = "STOREV64",
7352 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
7353 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
7354 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
7355 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
7356 [MCPE_LOADV32] = "LOADV32",
7357 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1",
7358 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2",
7359 [MCPE_STOREV32] = "STOREV32",
7360 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
7361 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
7362 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
7363 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
7364 [MCPE_LOADV16] = "LOADV16",
7365 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1",
7366 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2",
7367 [MCPE_STOREV16] = "STOREV16",
7368 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
7369 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
7370 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
7371 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
7372 [MCPE_LOADV8] = "LOADV8",
7373 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1",
7374 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2",
7375 [MCPE_STOREV8] = "STOREV8",
7376 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1",
7377 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2",
7378 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3",
7379 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4",
7380 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4",
7381 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8",
7382 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12",
7383 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16",
7384 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32",
7385 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
7386 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
7387 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
7388 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
7389 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4",
7390 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8",
7391 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12",
7392 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16",
7393 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32",
7394 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
7395 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
7396 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
7397 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
7398 [MCPE_NEW_MEM_STACK] = "new_mem_stack",
7399 [MCPE_DIE_MEM_STACK] = "die_mem_stack",
7400 [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o",
7401 [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o",
7402 [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
7403 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
7404 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7405 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
7406 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7407 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
7408 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7411 static void init_prof_mem ( void )
7413 Int i, name_count = 0;
7415 for (i = 0; i < MCPE_LAST; i++) {
7416 MC_(event_ctr)[i] = 0;
7417 if (MC_(event_ctr_name)[i] != NULL)
7418 ++name_count;
7421 /* Make sure every profiling event has a name */
7422 tl_assert(name_count == MCPE_LAST);
7425 static void done_prof_mem ( void )
7427 Int i, n;
7428 Bool spaced = False;
7429 for (i = n = 0; i < MCPE_LAST; i++) {
7430 if (!spaced && (n % 10) == 0) {
7431 VG_(printf)("\n");
7432 spaced = True;
7434 if (MC_(event_ctr)[i] > 0) {
7435 spaced = False;
7436 ++n;
7437 VG_(printf)( "prof mem event %3d: %11llu %s\n",
7438 i, MC_(event_ctr)[i],
7439 MC_(event_ctr_name)[i]);
7444 #else
7446 static void init_prof_mem ( void ) { }
7447 static void done_prof_mem ( void ) { }
7449 #endif
7452 /*------------------------------------------------------------*/
7453 /*--- Origin tracking stuff ---*/
7454 /*------------------------------------------------------------*/
7456 /*--------------------------------------------*/
7457 /*--- Origin tracking: load handlers ---*/
7458 /*--------------------------------------------*/
7460 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7461 return or1 > or2 ? or1 : or2;
7464 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7465 OCacheLine* line;
7466 UChar descr;
7467 UWord lineoff = oc_line_offset(a);
7468 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7470 if (OC_ENABLE_ASSERTIONS) {
7471 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7474 line = find_OCacheLine( a );
7476 descr = line->descr[lineoff];
7477 if (OC_ENABLE_ASSERTIONS) {
7478 tl_assert(descr < 0x10);
7481 if (LIKELY(0 == (descr & (1 << byteoff)))) {
7482 return 0;
7483 } else {
7484 return line->w32[lineoff];
7488 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7489 OCacheLine* line;
7490 UChar descr;
7491 UWord lineoff, byteoff;
7493 if (UNLIKELY(a & 1)) {
7494 /* Handle misaligned case, slowly. */
7495 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
7496 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
7497 return merge_origins(oLo, oHi);
7500 lineoff = oc_line_offset(a);
7501 byteoff = a & 3; /* 0 or 2 */
7503 if (OC_ENABLE_ASSERTIONS) {
7504 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7506 line = find_OCacheLine( a );
7508 descr = line->descr[lineoff];
7509 if (OC_ENABLE_ASSERTIONS) {
7510 tl_assert(descr < 0x10);
7513 if (LIKELY(0 == (descr & (3 << byteoff)))) {
7514 return 0;
7515 } else {
7516 return line->w32[lineoff];
7520 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7521 OCacheLine* line;
7522 UChar descr;
7523 UWord lineoff;
7525 if (UNLIKELY(a & 3)) {
7526 /* Handle misaligned case, slowly. */
7527 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
7528 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
7529 return merge_origins(oLo, oHi);
7532 lineoff = oc_line_offset(a);
7533 if (OC_ENABLE_ASSERTIONS) {
7534 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7537 line = find_OCacheLine( a );
7539 descr = line->descr[lineoff];
7540 if (OC_ENABLE_ASSERTIONS) {
7541 tl_assert(descr < 0x10);
7544 if (LIKELY(0 == descr)) {
7545 return 0;
7546 } else {
7547 return line->w32[lineoff];
7551 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7552 OCacheLine* line;
7553 UChar descrLo, descrHi, descr;
7554 UWord lineoff;
7556 if (UNLIKELY(a & 7)) {
7557 /* Handle misaligned case, slowly. */
7558 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
7559 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
7560 return merge_origins(oLo, oHi);
7563 lineoff = oc_line_offset(a);
7564 if (OC_ENABLE_ASSERTIONS) {
7565 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7568 line = find_OCacheLine( a );
7570 descrLo = line->descr[lineoff + 0];
7571 descrHi = line->descr[lineoff + 1];
7572 descr = descrLo | descrHi;
7573 if (OC_ENABLE_ASSERTIONS) {
7574 tl_assert(descr < 0x10);
7577 if (LIKELY(0 == descr)) {
7578 return 0; /* both 32-bit chunks are defined */
7579 } else {
7580 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
7581 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
7582 return merge_origins(oLo, oHi);
7586 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7587 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
7588 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
7589 UInt oBoth = merge_origins(oLo, oHi);
7590 return (UWord)oBoth;
7593 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7594 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
7595 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
7596 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
7597 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
7598 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
7599 merge_origins(oQ2, oQ3));
7600 return (UWord)oAll;
7604 /*--------------------------------------------*/
7605 /*--- Origin tracking: store handlers ---*/
7606 /*--------------------------------------------*/
7608 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7609 OCacheLine* line;
7610 UWord lineoff = oc_line_offset(a);
7611 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7613 if (OC_ENABLE_ASSERTIONS) {
7614 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7617 line = find_OCacheLine( a );
7619 if (d32 == 0) {
7620 line->descr[lineoff] &= ~(1 << byteoff);
7621 } else {
7622 line->descr[lineoff] |= (1 << byteoff);
7623 line->w32[lineoff] = d32;
7627 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7628 OCacheLine* line;
7629 UWord lineoff, byteoff;
7631 if (UNLIKELY(a & 1)) {
7632 /* Handle misaligned case, slowly. */
7633 MC_(helperc_b_store1)( a + 0, d32 );
7634 MC_(helperc_b_store1)( a + 1, d32 );
7635 return;
7638 lineoff = oc_line_offset(a);
7639 byteoff = a & 3; /* 0 or 2 */
7641 if (OC_ENABLE_ASSERTIONS) {
7642 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7645 line = find_OCacheLine( a );
7647 if (d32 == 0) {
7648 line->descr[lineoff] &= ~(3 << byteoff);
7649 } else {
7650 line->descr[lineoff] |= (3 << byteoff);
7651 line->w32[lineoff] = d32;
7655 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7656 OCacheLine* line;
7657 UWord lineoff;
7659 if (UNLIKELY(a & 3)) {
7660 /* Handle misaligned case, slowly. */
7661 MC_(helperc_b_store2)( a + 0, d32 );
7662 MC_(helperc_b_store2)( a + 2, d32 );
7663 return;
7666 lineoff = oc_line_offset(a);
7667 if (OC_ENABLE_ASSERTIONS) {
7668 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7671 line = find_OCacheLine( a );
7673 if (d32 == 0) {
7674 line->descr[lineoff] = 0;
7675 } else {
7676 line->descr[lineoff] = 0xF;
7677 line->w32[lineoff] = d32;
7681 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7682 OCacheLine* line;
7683 UWord lineoff;
7685 if (UNLIKELY(a & 7)) {
7686 /* Handle misaligned case, slowly. */
7687 MC_(helperc_b_store4)( a + 0, d32 );
7688 MC_(helperc_b_store4)( a + 4, d32 );
7689 return;
7692 lineoff = oc_line_offset(a);
7693 if (OC_ENABLE_ASSERTIONS) {
7694 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7697 line = find_OCacheLine( a );
7699 if (d32 == 0) {
7700 line->descr[lineoff + 0] = 0;
7701 line->descr[lineoff + 1] = 0;
7702 } else {
7703 line->descr[lineoff + 0] = 0xF;
7704 line->descr[lineoff + 1] = 0xF;
7705 line->w32[lineoff + 0] = d32;
7706 line->w32[lineoff + 1] = d32;
7710 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7711 MC_(helperc_b_store8)( a + 0, d32 );
7712 MC_(helperc_b_store8)( a + 8, d32 );
7715 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7716 MC_(helperc_b_store8)( a + 0, d32 );
7717 MC_(helperc_b_store8)( a + 8, d32 );
7718 MC_(helperc_b_store8)( a + 16, d32 );
7719 MC_(helperc_b_store8)( a + 24, d32 );
7723 /*--------------------------------------------*/
7724 /*--- Origin tracking: sarp handlers ---*/
7725 /*--------------------------------------------*/
7727 __attribute__((noinline))
7728 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
7729 if ((a & 1) && len >= 1) {
7730 MC_(helperc_b_store1)( a, otag );
7731 a++;
7732 len--;
7734 if ((a & 2) && len >= 2) {
7735 MC_(helperc_b_store2)( a, otag );
7736 a += 2;
7737 len -= 2;
7739 if (len >= 4)
7740 tl_assert(0 == (a & 3));
7741 while (len >= 4) {
7742 MC_(helperc_b_store4)( a, otag );
7743 a += 4;
7744 len -= 4;
7746 if (len >= 2) {
7747 MC_(helperc_b_store2)( a, otag );
7748 a += 2;
7749 len -= 2;
7751 if (len >= 1) {
7752 MC_(helperc_b_store1)( a, otag );
7753 //a++;
7754 len--;
7756 tl_assert(len == 0);
7759 __attribute__((noinline))
7760 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
7761 if ((a & 1) && len >= 1) {
7762 MC_(helperc_b_store1)( a, 0 );
7763 a++;
7764 len--;
7766 if ((a & 2) && len >= 2) {
7767 MC_(helperc_b_store2)( a, 0 );
7768 a += 2;
7769 len -= 2;
7771 if (len >= 4)
7772 tl_assert(0 == (a & 3));
7773 while (len >= 4) {
7774 MC_(helperc_b_store4)( a, 0 );
7775 a += 4;
7776 len -= 4;
7778 if (len >= 2) {
7779 MC_(helperc_b_store2)( a, 0 );
7780 a += 2;
7781 len -= 2;
7783 if (len >= 1) {
7784 MC_(helperc_b_store1)( a, 0 );
7785 //a++;
7786 len--;
7788 tl_assert(len == 0);
7792 /*------------------------------------------------------------*/
7793 /*--- Setup and finalisation ---*/
7794 /*------------------------------------------------------------*/
7796 static void mc_post_clo_init ( void )
7798 /* If we've been asked to emit XML, mash around various other
7799 options so as to constrain the output somewhat. */
7800 if (VG_(clo_xml)) {
7801 /* Extract as much info as possible from the leak checker. */
7802 MC_(clo_leak_check) = LC_Full;
7805 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
7806 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7807 VG_(message)(Vg_UserMsg,
7808 "Warning: --freelist-big-blocks value %lld has no effect\n"
7809 "as it is >= to --freelist-vol value %lld\n",
7810 MC_(clo_freelist_big_blocks),
7811 MC_(clo_freelist_vol));
7814 if (MC_(clo_workaround_gcc296_bugs)
7815 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7816 VG_(umsg)(
7817 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7818 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7819 "\n"
7823 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
7825 if (MC_(clo_mc_level) == 3) {
7826 /* We're doing origin tracking. */
7827 # ifdef PERF_FAST_STACK
7828 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
7829 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
7830 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
7831 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
7832 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
7833 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
7834 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
7835 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
7836 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
7837 # endif
7838 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
7839 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
7840 } else {
7841 /* Not doing origin tracking */
7842 # ifdef PERF_FAST_STACK
7843 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
7844 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
7845 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
7846 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
7847 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
7848 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
7849 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
7850 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
7851 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
7852 # endif
7853 VG_(track_new_mem_stack) ( mc_new_mem_stack );
7854 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
7857 // We assume that brk()/sbrk() does not initialise new memory. Is this
7858 // accurate? John Reiser says:
7860 // 0) sbrk() can *decrease* process address space. No zero fill is done
7861 // for a decrease, not even the fragment on the high end of the last page
7862 // that is beyond the new highest address. For maximum safety and
7863 // portability, then the bytes in the last page that reside above [the
7864 // new] sbrk(0) should be considered to be uninitialized, but in practice
7865 // it is exceedingly likely that they will retain their previous
7866 // contents.
7868 // 1) If an increase is large enough to require new whole pages, then
7869 // those new whole pages (like all new pages) are zero-filled by the
7870 // operating system. So if sbrk(0) already is page aligned, then
7871 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7873 // 2) Any increase that lies within an existing allocated page is not
7874 // changed. So if (x = sbrk(0)) is not page aligned, then
7875 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7876 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7877 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7878 // of them come along for the ride because the operating system deals
7879 // only in whole pages. Again, for maximum safety and portability, then
7880 // anything that lives above [the new] sbrk(0) should be considered
7881 // uninitialized, but in practice will retain previous contents [zero in
7882 // this case.]"
7884 // In short:
7886 // A key property of sbrk/brk is that new whole pages that are supplied
7887 // by the operating system *do* get initialized to zero.
7889 // As for the portability of all this:
7891 // sbrk and brk are not POSIX. However, any system that is a derivative
7892 // of *nix has sbrk and brk because there are too many software (such as
7893 // the Bourne shell) which rely on the traditional memory map (.text,
7894 // .data+.bss, stack) and the existence of sbrk/brk.
7896 // So we should arguably observe all this. However:
7897 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7898 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7899 // doubt most programmers know the above information.
7900 // So I'm not terribly unhappy with marking it as undefined. --njn.
7902 // [More: I think most of what John said only applies to sbrk(). It seems
7903 // that brk() always deals in whole pages. And since this event deals
7904 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7905 // just mark all memory it allocates as defined.]
7907 # if !defined(VGO_solaris)
7908 if (MC_(clo_mc_level) == 3)
7909 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
7910 else
7911 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
7912 # else
7913 // On Solaris, brk memory has to be marked as defined, otherwise we get
7914 // many false positives.
7915 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
7916 # endif
7918 /* This origin tracking cache is huge (~100M), so only initialise
7919 if we need it. */
7920 if (MC_(clo_mc_level) >= 3) {
7921 init_OCache();
7922 tl_assert(ocacheL1 != NULL);
7923 tl_assert(ocacheL2 != NULL);
7924 } else {
7925 tl_assert(ocacheL1 == NULL);
7926 tl_assert(ocacheL2 == NULL);
7929 MC_(chunk_poolalloc) = VG_(newPA)
7930 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
7931 1000,
7932 VG_(malloc),
7933 "mc.cMC.1 (MC_Chunk pools)",
7934 VG_(free));
7936 /* Do not check definedness of guest state if --undef-value-errors=no */
7937 if (MC_(clo_mc_level) >= 2)
7938 VG_(track_pre_reg_read) ( mc_pre_reg_read );
7940 if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
7941 if (MC_(clo_keep_stacktraces) == KS_none
7942 || MC_(clo_keep_stacktraces) == KS_free)
7943 VG_(fmsg_bad_option)("--keep-stacktraces",
7944 "To use --xtree-memory=full, you must"
7945 " keep at least the alloc stacktrace\n");
7946 // Activate full xtree memory profiling.
7947 VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
7952 static void print_SM_info(const HChar* type, Int n_SMs)
7954 VG_(message)(Vg_DebugMsg,
7955 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7956 type,
7957 n_SMs,
7958 n_SMs * sizeof(SecMap) / 1024UL,
7959 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
7962 static void mc_print_stats (void)
7964 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
7966 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
7967 VG_(free_queue_volume), VG_(free_queue_length));
7968 VG_(message)(Vg_DebugMsg,
7969 " memcheck: sanity checks: %d cheap, %d expensive\n",
7970 n_sanity_cheap, n_sanity_expensive );
7971 VG_(message)(Vg_DebugMsg,
7972 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7973 n_auxmap_L2_nodes,
7974 n_auxmap_L2_nodes * 64,
7975 n_auxmap_L2_nodes / 16 );
7976 VG_(message)(Vg_DebugMsg,
7977 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7978 n_auxmap_L1_searches, n_auxmap_L1_cmps,
7979 (10ULL * n_auxmap_L1_cmps)
7980 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
7982 VG_(message)(Vg_DebugMsg,
7983 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7984 n_auxmap_L2_searches, n_auxmap_L2_nodes
7987 print_SM_info("n_issued ", n_issued_SMs);
7988 print_SM_info("n_deissued ", n_deissued_SMs);
7989 print_SM_info("max_noaccess ", max_noaccess_SMs);
7990 print_SM_info("max_undefined", max_undefined_SMs);
7991 print_SM_info("max_defined ", max_defined_SMs);
7992 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
7994 // Three DSMs, plus the non-DSM ones
7995 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
7996 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7997 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7998 // the elements on pointer size.
7999 // Note that the pool allocator has some additional small overhead
8000 // which is not counted in the below.
8001 // Hardwiring this logic sucks, but I don't see how else to do it.
8002 max_secVBit_szB = max_secVBit_nodes *
8003 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
8004 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
8006 VG_(message)(Vg_DebugMsg,
8007 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
8008 max_secVBit_nodes, max_secVBit_szB / 1024,
8009 max_secVBit_szB / (1024 * 1024));
8010 VG_(message)(Vg_DebugMsg,
8011 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8012 sec_vbits_new_nodes + sec_vbits_updates,
8013 sec_vbits_new_nodes, sec_vbits_updates );
8014 VG_(message)(Vg_DebugMsg,
8015 " memcheck: max shadow mem size: %luk, %luM\n",
8016 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
8018 if (MC_(clo_mc_level) >= 3) {
8019 VG_(message)(Vg_DebugMsg,
8020 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
8021 stats_ocacheL1_find,
8022 stats_ocacheL1_misses,
8023 stats_ocacheL1_lossage );
8024 VG_(message)(Vg_DebugMsg,
8025 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
8026 stats_ocacheL1_find - stats_ocacheL1_misses
8027 - stats_ocacheL1_found_at_1
8028 - stats_ocacheL1_found_at_N,
8029 stats_ocacheL1_found_at_1 );
8030 VG_(message)(Vg_DebugMsg,
8031 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
8032 stats_ocacheL1_found_at_N,
8033 stats_ocacheL1_movefwds );
8034 VG_(message)(Vg_DebugMsg,
8035 " ocacheL1: %'12lu sizeB %'12d useful\n",
8036 (SizeT)sizeof(OCache),
8037 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
8038 VG_(message)(Vg_DebugMsg,
8039 " ocacheL2: %'12lu refs %'12lu misses\n",
8040 stats__ocacheL2_refs,
8041 stats__ocacheL2_misses );
8042 VG_(message)(Vg_DebugMsg,
8043 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8044 stats__ocacheL2_n_nodes_max,
8045 stats__ocacheL2_n_nodes );
8046 VG_(message)(Vg_DebugMsg,
8047 " niacache: %'12lu refs %'12lu misses\n",
8048 stats__nia_cache_queries, stats__nia_cache_misses);
8049 } else {
8050 tl_assert(ocacheL1 == NULL);
8051 tl_assert(ocacheL2 == NULL);
8056 static void mc_fini ( Int exitcode )
8058 MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
8059 MC_(print_malloc_stats)();
8061 if (MC_(clo_leak_check) != LC_Off) {
8062 LeakCheckParams lcp;
8063 HChar* xt_filename = NULL;
8064 lcp.mode = MC_(clo_leak_check);
8065 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
8066 lcp.heuristics = MC_(clo_leak_check_heuristics);
8067 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
8068 lcp.deltamode = LCD_Any;
8069 lcp.max_loss_records_output = 999999999;
8070 lcp.requested_by_monitor_command = False;
8071 if (MC_(clo_xtree_leak)) {
8072 xt_filename = VG_(expand_file_name)("--xtree-leak-file",
8073 MC_(clo_xtree_leak_file));
8074 lcp.xt_filename = xt_filename;
8075 lcp.mode = LC_Full;
8077 else
8078 lcp.xt_filename = NULL;
8079 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
8080 if (MC_(clo_xtree_leak))
8081 VG_(free)(xt_filename);
8082 } else {
8083 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8084 VG_(umsg)(
8085 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8086 "\n"
8091 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8092 VG_(message)(Vg_UserMsg,
8093 "For counts of detected and suppressed errors, rerun with: -v\n");
8096 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
8097 && MC_(clo_mc_level) == 2) {
8098 VG_(message)(Vg_UserMsg,
8099 "Use --track-origins=yes to see where "
8100 "uninitialised values come from\n");
8103 /* Print a warning if any client-request generated ignore-ranges
8104 still exist. It would be reasonable to expect that a properly
8105 written program would remove any such ranges before exiting, and
8106 since they are a bit on the dangerous side, let's comment. By
8107 contrast ranges which are specified on the command line normally
8108 pertain to hardware mapped into the address space, and so we
8109 can't expect the client to have got rid of them. */
8110 if (gIgnoredAddressRanges) {
8111 UInt i, nBad = 0;
8112 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
8113 UWord val = IAR_INVALID;
8114 UWord key_min = ~(UWord)0;
8115 UWord key_max = (UWord)0;
8116 VG_(indexRangeMap)( &key_min, &key_max, &val,
8117 gIgnoredAddressRanges, i );
8118 if (val != IAR_ClientReq)
8119 continue;
8120 /* Print the offending range. Also, if it is the first,
8121 print a banner before it. */
8122 nBad++;
8123 if (nBad == 1) {
8124 VG_(umsg)(
8125 "WARNING: exiting program has the following client-requested\n"
8126 "WARNING: address error disablement range(s) still in force,\n"
8127 "WARNING: "
8128 "possibly as a result of some mistake in the use of the\n"
8129 "WARNING: "
8130 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8133 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n",
8134 i, key_min, key_max, showIARKind(val));
8138 done_prof_mem();
8140 if (VG_(clo_stats))
8141 mc_print_stats();
8143 if (0) {
8144 VG_(message)(Vg_DebugMsg,
8145 "------ Valgrind's client block stats follow ---------------\n" );
8146 show_client_block_stats();
8150 /* mark the given addr/len unaddressable for watchpoint implementation
8151 The PointKind will be handled at access time */
8152 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
8153 Addr addr, SizeT len)
8155 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8156 accessibility and definedness in gdbserver so as to allow restoring it
8157 properly. Currently, we assume that the user only watches things
8158 which are properly addressable and defined */
8159 if (insert)
8160 MC_(make_mem_noaccess) (addr, len);
8161 else
8162 MC_(make_mem_defined) (addr, len);
8163 return True;
8166 static void mc_pre_clo_init(void)
8168 VG_(details_name) ("Memcheck");
8169 VG_(details_version) (NULL);
8170 VG_(details_description) ("a memory error detector");
8171 VG_(details_copyright_author)(
8172 "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
8173 VG_(details_bug_reports_to) (VG_BUGS_TO);
8174 VG_(details_avg_translation_sizeB) ( 640 );
8176 VG_(basic_tool_funcs) (mc_post_clo_init,
8177 MC_(instrument),
8178 mc_fini);
8180 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
8183 VG_(needs_core_errors) ();
8184 VG_(needs_tool_errors) (MC_(eq_Error),
8185 MC_(before_pp_Error),
8186 MC_(pp_Error),
8187 True,/*show TIDs for errors*/
8188 MC_(update_Error_extra),
8189 MC_(is_recognised_suppression),
8190 MC_(read_extra_suppression_info),
8191 MC_(error_matches_suppression),
8192 MC_(get_error_name),
8193 MC_(get_extra_suppression_info),
8194 MC_(print_extra_suppression_use),
8195 MC_(update_extra_suppression_use));
8196 VG_(needs_libc_freeres) ();
8197 VG_(needs_cxx_freeres) ();
8198 VG_(needs_command_line_options)(mc_process_cmd_line_options,
8199 mc_print_usage,
8200 mc_print_debug_usage);
8201 VG_(needs_client_requests) (mc_handle_client_request);
8202 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
8203 mc_expensive_sanity_check);
8204 VG_(needs_print_stats) (mc_print_stats);
8205 VG_(needs_info_location) (MC_(pp_describe_addr));
8206 VG_(needs_malloc_replacement) (MC_(malloc),
8207 MC_(__builtin_new),
8208 MC_(__builtin_vec_new),
8209 MC_(memalign),
8210 MC_(calloc),
8211 MC_(free),
8212 MC_(__builtin_delete),
8213 MC_(__builtin_vec_delete),
8214 MC_(realloc),
8215 MC_(malloc_usable_size),
8216 MC_MALLOC_DEFAULT_REDZONE_SZB );
8217 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
8219 VG_(needs_xml_output) ();
8221 VG_(track_new_mem_startup) ( mc_new_mem_startup );
8223 // Handling of mmap and mprotect isn't simple (well, it is simple,
8224 // but the justification isn't.) See comments above, just prior to
8225 // mc_new_mem_mmap.
8226 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
8227 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
8229 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
8231 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
8232 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
8233 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
8235 /* Defer the specification of the new_mem_stack functions to the
8236 post_clo_init function, since we need to first parse the command
8237 line before deciding which set to use. */
8239 # ifdef PERF_FAST_STACK
8240 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
8241 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
8242 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
8243 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
8244 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
8245 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
8246 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
8247 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
8248 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
8249 # endif
8250 VG_(track_die_mem_stack) ( mc_die_mem_stack );
8252 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
8254 VG_(track_pre_mem_read) ( check_mem_is_defined );
8255 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
8256 VG_(track_pre_mem_write) ( check_mem_is_addressable );
8257 VG_(track_post_mem_write) ( mc_post_mem_write );
8259 VG_(track_post_reg_write) ( mc_post_reg_write );
8260 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
8262 if (MC_(clo_mc_level) >= 2) {
8263 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
8264 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
8267 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
8269 init_shadow_memory();
8270 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8271 tl_assert(MC_(chunk_poolalloc) == NULL);
8272 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
8273 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
8274 init_prof_mem();
8276 tl_assert( mc_expensive_sanity_check() );
8278 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8279 tl_assert(sizeof(UWord) == sizeof(Addr));
8280 // Call me paranoid. I don't care.
8281 tl_assert(sizeof(void*) == sizeof(Addr));
8283 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8284 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
8286 /* This is small. Always initialise it. */
8287 init_nia_to_ecu_cache();
8289 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8290 if we need to, since the command line args haven't been
8291 processed yet. Hence defer it to mc_post_clo_init. */
8292 tl_assert(ocacheL1 == NULL);
8293 tl_assert(ocacheL2 == NULL);
8295 /* Check some important stuff. See extensive comments above
8296 re UNALIGNED_OR_HIGH for background. */
8297 # if VG_WORDSIZE == 4
8298 tl_assert(sizeof(void*) == 4);
8299 tl_assert(sizeof(Addr) == 4);
8300 tl_assert(sizeof(UWord) == 4);
8301 tl_assert(sizeof(Word) == 4);
8302 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
8303 tl_assert(MASK(1) == 0UL);
8304 tl_assert(MASK(2) == 1UL);
8305 tl_assert(MASK(4) == 3UL);
8306 tl_assert(MASK(8) == 7UL);
8307 # else
8308 tl_assert(VG_WORDSIZE == 8);
8309 tl_assert(sizeof(void*) == 8);
8310 tl_assert(sizeof(Addr) == 8);
8311 tl_assert(sizeof(UWord) == 8);
8312 tl_assert(sizeof(Word) == 8);
8313 tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
8314 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
8315 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
8316 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
8317 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
8318 # endif
8320 /* Check some assertions to do with the instrumentation machinery. */
8321 MC_(do_instrumentation_startup_checks)();
8324 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
8326 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
8328 /*--------------------------------------------------------------------*/
8329 /*--- end mc_main.c ---*/
8330 /*--------------------------------------------------------------------*/