amd64: add a spec rule for SHRL/SARL then CondS. gcc-8 has been seen to generate...
[valgrind.git] / memcheck / mc_main.c
blob892e5035e92b5c2043d694c2286fa3cc868a0dea
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
6 /*--- mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
9 /*
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
14 jseward@acm.org
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
31 The GNU General Public License is contained in the file COPYING.
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h" // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 #include "pub_tool_xarray.h"
51 #include "pub_tool_xtree.h"
52 #include "pub_tool_xtmemory.h"
54 #include "mc_include.h"
55 #include "memcheck.h" /* for client requests */
58 /* Set to 1 to enable handwritten assembly helpers on targets for
59 which it is supported. */
60 #define ENABLE_ASSEMBLY_HELPERS 1
62 /* Set to 1 to do a little more sanity checking */
63 #define VG_DEBUG_MEMORY 0
65 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
67 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
68 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
71 /*------------------------------------------------------------*/
72 /*--- Fast-case knobs ---*/
73 /*------------------------------------------------------------*/
75 // Comment these out to disable the fast cases (don't just set them to zero).
77 #define PERF_FAST_LOADV 1
78 #define PERF_FAST_STOREV 1
80 #define PERF_FAST_SARP 1
82 #define PERF_FAST_STACK 1
83 #define PERF_FAST_STACK2 1
85 /* Change this to 1 to enable assertions on origin tracking cache fast
86 paths */
87 #define OC_ENABLE_ASSERTIONS 0
90 /*------------------------------------------------------------*/
91 /*--- Comments on the origin tracking implementation ---*/
92 /*------------------------------------------------------------*/
94 /* See detailed comment entitled
95 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
96 which is contained further on in this file. */
99 /*------------------------------------------------------------*/
100 /*--- V bits and A bits ---*/
101 /*------------------------------------------------------------*/
103 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
104 thinks the corresponding value bit is defined. And every memory byte
105 has an A bit, which tracks whether Memcheck thinks the program can access
106 it safely (ie. it's mapped, and has at least one of the RWX permission bits
107 set). So every N-bit register is shadowed with N V bits, and every memory
108 byte is shadowed with 8 V bits and one A bit.
110 In the implementation, we use two forms of compression (compressed V bits
111 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
112 for memory.
114 Memcheck also tracks extra information about each heap block that is
115 allocated, for detecting memory leaks and other purposes.
118 /*------------------------------------------------------------*/
119 /*--- Basic A/V bitmap representation. ---*/
120 /*------------------------------------------------------------*/
122 /* All reads and writes are checked against a memory map (a.k.a. shadow
123 memory), which records the state of all memory in the process.
125 On 32-bit machines the memory map is organised as follows.
126 The top 16 bits of an address are used to index into a top-level
127 map table, containing 65536 entries. Each entry is a pointer to a
128 second-level map, which records the accesibililty and validity
129 permissions for the 65536 bytes indexed by the lower 16 bits of the
130 address. Each byte is represented by two bits (details are below). So
131 each second-level map contains 16384 bytes. This two-level arrangement
132 conveniently divides the 4G address space into 64k lumps, each size 64k
133 bytes.
135 All entries in the primary (top-level) map must point to a valid
136 secondary (second-level) map. Since many of the 64kB chunks will
137 have the same status for every bit -- ie. noaccess (for unused
138 address space) or entirely addressable and defined (for code segments) --
139 there are three distinguished secondary maps, which indicate 'noaccess',
140 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
141 map entry points to the relevant distinguished map. In practice,
142 typically more than half of the addressable memory is represented with
143 the 'undefined' or 'defined' distinguished secondary map, so it gives a
144 good saving. It also lets us set the V+A bits of large address regions
145 quickly in set_address_range_perms().
147 On 64-bit machines it's more complicated. If we followed the same basic
148 scheme we'd have a four-level table which would require too many memory
149 accesses. So instead the top-level map table has 2^20 entries (indexed
150 using bits 16..35 of the address); this covers the bottom 64GB. Any
151 accesses above 64GB are handled with a slow, sparse auxiliary table.
152 Valgrind's address space manager tries very hard to keep things below
153 this 64GB barrier so that performance doesn't suffer too much.
155 Note that this file has a lot of different functions for reading and
156 writing shadow memory. Only a couple are strictly necessary (eg.
157 get_vabits2 and set_vabits2), most are just specialised for specific
158 common cases to improve performance.
160 Aside: the V+A bits are less precise than they could be -- we have no way
161 of marking memory as read-only. It would be great if we could add an
162 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
163 which requires 2.3 bits to hold, and there's no way to do that elegantly
164 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
165 seem worth it.
168 /* --------------- Basic configuration --------------- */
170 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
172 #if VG_WORDSIZE == 4
174 /* cover the entire address space */
175 # define N_PRIMARY_BITS 16
177 #else
179 /* Just handle the first 128G fast and the rest via auxiliary
180 primaries. If you change this, Memcheck will assert at startup.
181 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
182 # define N_PRIMARY_BITS 21
184 #endif
187 /* Do not change this. */
188 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
190 /* Do not change this. */
191 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
194 /* --------------- Secondary maps --------------- */
196 // Each byte of memory conceptually has an A bit, which indicates its
197 // addressability, and 8 V bits, which indicates its definedness.
199 // But because very few bytes are partially defined, we can use a nice
200 // compression scheme to reduce the size of shadow memory. Each byte of
201 // memory has 2 bits which indicates its state (ie. V+A bits):
203 // 00: noaccess (unaddressable but treated as fully defined)
204 // 01: undefined (addressable and fully undefined)
205 // 10: defined (addressable and fully defined)
206 // 11: partdefined (addressable and partially defined)
208 // In the "partdefined" case, we use a secondary table to store the V bits.
209 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
210 // bits.
212 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
213 // four bytes (32 bits) of memory are in each chunk. Hence the name
214 // "vabits8". This lets us get the V+A bits for four bytes at a time
215 // easily (without having to do any shifting and/or masking), and that is a
216 // very common operation. (Note that although each vabits8 chunk
217 // is 8 bits in size, it represents 32 bits of memory.)
219 // The representation is "inverse" little-endian... each 4 bytes of
220 // memory is represented by a 1 byte value, where:
222 // - the status of byte (a+0) is held in bits [1..0]
223 // - the status of byte (a+1) is held in bits [3..2]
224 // - the status of byte (a+2) is held in bits [5..4]
225 // - the status of byte (a+3) is held in bits [7..6]
227 // It's "inverse" because endianness normally describes a mapping from
228 // value bits to memory addresses; in this case the mapping is inverted.
229 // Ie. instead of particular value bits being held in certain addresses, in
230 // this case certain addresses are represented by particular value bits.
231 // See insert_vabits2_into_vabits8() for an example.
233 // But note that we don't compress the V bits stored in registers; they
234 // need to be explicit to made the shadow operations possible. Therefore
235 // when moving values between registers and memory we need to convert
236 // between the expanded in-register format and the compressed in-memory
237 // format. This isn't so difficult, it just requires careful attention in a
238 // few places.
240 // These represent eight bits of memory.
241 #define VA_BITS2_NOACCESS 0x0 // 00b
242 #define VA_BITS2_UNDEFINED 0x1 // 01b
243 #define VA_BITS2_DEFINED 0x2 // 10b
244 #define VA_BITS2_PARTDEFINED 0x3 // 11b
246 // These represent 16 bits of memory.
247 #define VA_BITS4_NOACCESS 0x0 // 00_00b
248 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
249 #define VA_BITS4_DEFINED 0xa // 10_10b
251 // These represent 32 bits of memory.
252 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
253 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
254 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
256 // These represent 64 bits of memory.
257 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
258 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
259 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
261 // These represent 128 bits of memory.
262 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
265 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
266 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
267 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
269 // Paranoia: it's critical for performance that the requested inlining
270 // occurs. So try extra hard.
271 #define INLINE inline __attribute__((always_inline))
273 static INLINE Addr start_of_this_sm ( Addr a ) {
274 return (a & (~SM_MASK));
276 static INLINE Bool is_start_of_sm ( Addr a ) {
277 return (start_of_this_sm(a) == a);
280 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
282 typedef
283 union {
284 UChar vabits8[SM_CHUNKS];
285 UShort vabits16[SM_CHUNKS/2];
287 SecMap;
289 // 3 distinguished secondary maps, one for no-access, one for
290 // accessible but undefined, and one for accessible and defined.
291 // Distinguished secondaries may never be modified.
292 #define SM_DIST_NOACCESS 0
293 #define SM_DIST_UNDEFINED 1
294 #define SM_DIST_DEFINED 2
296 static SecMap sm_distinguished[3];
298 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
299 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
302 // Forward declaration
303 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
305 /* dist_sm points to one of our three distinguished secondaries. Make
306 a copy of it so that we can write to it.
308 static SecMap* copy_for_writing ( SecMap* dist_sm )
310 SecMap* new_sm;
311 tl_assert(dist_sm == &sm_distinguished[0]
312 || dist_sm == &sm_distinguished[1]
313 || dist_sm == &sm_distinguished[2]);
315 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
316 if (new_sm == NULL)
317 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
318 sizeof(SecMap) );
319 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
320 update_SM_counts(dist_sm, new_sm);
321 return new_sm;
324 /* --------------- Stats --------------- */
326 static Int n_issued_SMs = 0;
327 static Int n_deissued_SMs = 0;
328 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
329 static Int n_undefined_SMs = 0;
330 static Int n_defined_SMs = 0;
331 static Int n_non_DSM_SMs = 0;
332 static Int max_noaccess_SMs = 0;
333 static Int max_undefined_SMs = 0;
334 static Int max_defined_SMs = 0;
335 static Int max_non_DSM_SMs = 0;
337 /* # searches initiated in auxmap_L1, and # base cmps required */
338 static ULong n_auxmap_L1_searches = 0;
339 static ULong n_auxmap_L1_cmps = 0;
340 /* # of searches that missed in auxmap_L1 and therefore had to
341 be handed to auxmap_L2. And the number of nodes inserted. */
342 static ULong n_auxmap_L2_searches = 0;
343 static ULong n_auxmap_L2_nodes = 0;
345 static Int n_sanity_cheap = 0;
346 static Int n_sanity_expensive = 0;
348 static Int n_secVBit_nodes = 0;
349 static Int max_secVBit_nodes = 0;
351 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
353 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
354 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
355 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
356 else { n_non_DSM_SMs --;
357 n_deissued_SMs ++; }
359 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
360 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
361 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
362 else { n_non_DSM_SMs ++;
363 n_issued_SMs ++; }
365 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
366 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
367 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
368 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
371 /* --------------- Primary maps --------------- */
373 /* The main primary map. This covers some initial part of the address
374 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
375 handled using the auxiliary primary map.
377 static SecMap* primary_map[N_PRIMARY_MAP];
380 /* An entry in the auxiliary primary map. base must be a 64k-aligned
381 value, and sm points at the relevant secondary map. As with the
382 main primary map, the secondary may be either a real secondary, or
383 one of the three distinguished secondaries. DO NOT CHANGE THIS
384 LAYOUT: the first word has to be the key for OSet fast lookups.
386 typedef
387 struct {
388 Addr base;
389 SecMap* sm;
391 AuxMapEnt;
393 /* Tunable parameter: How big is the L1 queue? */
394 #define N_AUXMAP_L1 24
396 /* Tunable parameter: How far along the L1 queue to insert
397 entries resulting from L2 lookups? */
398 #define AUXMAP_L1_INSERT_IX 12
400 static struct {
401 Addr base;
402 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
404 auxmap_L1[N_AUXMAP_L1];
406 static OSet* auxmap_L2 = NULL;
408 static void init_auxmap_L1_L2 ( void )
410 Int i;
411 for (i = 0; i < N_AUXMAP_L1; i++) {
412 auxmap_L1[i].base = 0;
413 auxmap_L1[i].ent = NULL;
416 tl_assert(0 == offsetof(AuxMapEnt,base));
417 tl_assert(sizeof(Addr) == sizeof(void*));
418 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
419 /*fastCmp*/ NULL,
420 VG_(malloc), "mc.iaLL.1", VG_(free) );
423 /* Check representation invariants; if OK return NULL; else a
424 descriptive bit of text. Also return the number of
425 non-distinguished secondary maps referred to from the auxiliary
426 primary maps. */
428 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
430 Word i, j;
431 /* On a 32-bit platform, the L2 and L1 tables should
432 both remain empty forever.
434 On a 64-bit platform:
435 In the L2 table:
436 all .base & 0xFFFF == 0
437 all .base > MAX_PRIMARY_ADDRESS
438 In the L1 table:
439 all .base & 0xFFFF == 0
440 all (.base > MAX_PRIMARY_ADDRESS
441 .base & 0xFFFF == 0
442 and .ent points to an AuxMapEnt with the same .base)
444 (.base == 0 and .ent == NULL)
446 *n_secmaps_found = 0;
447 if (sizeof(void*) == 4) {
448 /* 32-bit platform */
449 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
450 return "32-bit: auxmap_L2 is non-empty";
451 for (i = 0; i < N_AUXMAP_L1; i++)
452 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
453 return "32-bit: auxmap_L1 is non-empty";
454 } else {
455 /* 64-bit platform */
456 UWord elems_seen = 0;
457 AuxMapEnt *elem, *res;
458 AuxMapEnt key;
459 /* L2 table */
460 VG_(OSetGen_ResetIter)(auxmap_L2);
461 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
462 elems_seen++;
463 if (0 != (elem->base & (Addr)0xFFFF))
464 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
465 if (elem->base <= MAX_PRIMARY_ADDRESS)
466 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
467 if (elem->sm == NULL)
468 return "64-bit: .sm in _L2 is NULL";
469 if (!is_distinguished_sm(elem->sm))
470 (*n_secmaps_found)++;
472 if (elems_seen != n_auxmap_L2_nodes)
473 return "64-bit: disagreement on number of elems in _L2";
474 /* Check L1-L2 correspondence */
475 for (i = 0; i < N_AUXMAP_L1; i++) {
476 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
477 continue;
478 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
479 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
480 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
481 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
482 if (auxmap_L1[i].ent == NULL)
483 return "64-bit: .ent is NULL in auxmap_L1";
484 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
485 return "64-bit: _L1 and _L2 bases are inconsistent";
486 /* Look it up in auxmap_L2. */
487 key.base = auxmap_L1[i].base;
488 key.sm = 0;
489 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
490 if (res == NULL)
491 return "64-bit: _L1 .base not found in _L2";
492 if (res != auxmap_L1[i].ent)
493 return "64-bit: _L1 .ent disagrees with _L2 entry";
495 /* Check L1 contains no duplicates */
496 for (i = 0; i < N_AUXMAP_L1; i++) {
497 if (auxmap_L1[i].base == 0)
498 continue;
499 for (j = i+1; j < N_AUXMAP_L1; j++) {
500 if (auxmap_L1[j].base == 0)
501 continue;
502 if (auxmap_L1[j].base == auxmap_L1[i].base)
503 return "64-bit: duplicate _L1 .base entries";
507 return NULL; /* ok */
510 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
512 Word i;
513 tl_assert(ent);
514 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
515 for (i = N_AUXMAP_L1-1; i > rank; i--)
516 auxmap_L1[i] = auxmap_L1[i-1];
517 auxmap_L1[rank].base = ent->base;
518 auxmap_L1[rank].ent = ent;
521 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
523 AuxMapEnt key;
524 AuxMapEnt* res;
525 Word i;
527 tl_assert(a > MAX_PRIMARY_ADDRESS);
528 a &= ~(Addr)0xFFFF;
530 /* First search the front-cache, which is a self-organising
531 list containing the most popular entries. */
533 if (LIKELY(auxmap_L1[0].base == a))
534 return auxmap_L1[0].ent;
535 if (LIKELY(auxmap_L1[1].base == a)) {
536 Addr t_base = auxmap_L1[0].base;
537 AuxMapEnt* t_ent = auxmap_L1[0].ent;
538 auxmap_L1[0].base = auxmap_L1[1].base;
539 auxmap_L1[0].ent = auxmap_L1[1].ent;
540 auxmap_L1[1].base = t_base;
541 auxmap_L1[1].ent = t_ent;
542 return auxmap_L1[0].ent;
545 n_auxmap_L1_searches++;
547 for (i = 0; i < N_AUXMAP_L1; i++) {
548 if (auxmap_L1[i].base == a) {
549 break;
552 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
554 n_auxmap_L1_cmps += (ULong)(i+1);
556 if (i < N_AUXMAP_L1) {
557 if (i > 0) {
558 Addr t_base = auxmap_L1[i-1].base;
559 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
560 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
561 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
562 auxmap_L1[i-0].base = t_base;
563 auxmap_L1[i-0].ent = t_ent;
564 i--;
566 return auxmap_L1[i].ent;
569 n_auxmap_L2_searches++;
571 /* First see if we already have it. */
572 key.base = a;
573 key.sm = 0;
575 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
576 if (res)
577 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
578 return res;
581 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
583 AuxMapEnt *nyu, *res;
585 /* First see if we already have it. */
586 res = maybe_find_in_auxmap( a );
587 if (LIKELY(res))
588 return res;
590 /* Ok, there's no entry in the secondary map, so we'll have
591 to allocate one. */
592 a &= ~(Addr)0xFFFF;
594 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
595 nyu->base = a;
596 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
597 VG_(OSetGen_Insert)( auxmap_L2, nyu );
598 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
599 n_auxmap_L2_nodes++;
600 return nyu;
603 /* --------------- SecMap fundamentals --------------- */
605 // In all these, 'low' means it's definitely in the main primary map,
606 // 'high' means it's definitely in the auxiliary table.
608 static INLINE UWord get_primary_map_low_offset ( Addr a )
610 UWord pm_off = a >> 16;
611 return pm_off;
614 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
616 UWord pm_off = a >> 16;
617 # if VG_DEBUG_MEMORY >= 1
618 tl_assert(pm_off < N_PRIMARY_MAP);
619 # endif
620 return &primary_map[ pm_off ];
623 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
625 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
626 return &am->sm;
629 static INLINE SecMap** get_secmap_ptr ( Addr a )
631 return ( a <= MAX_PRIMARY_ADDRESS
632 ? get_secmap_low_ptr(a)
633 : get_secmap_high_ptr(a));
636 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
638 return *get_secmap_low_ptr(a);
641 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
643 return *get_secmap_high_ptr(a);
646 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
648 SecMap** p = get_secmap_low_ptr(a);
649 if (UNLIKELY(is_distinguished_sm(*p)))
650 *p = copy_for_writing(*p);
651 return *p;
654 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
656 SecMap** p = get_secmap_high_ptr(a);
657 if (UNLIKELY(is_distinguished_sm(*p)))
658 *p = copy_for_writing(*p);
659 return *p;
662 /* Produce the secmap for 'a', either from the primary map or by
663 ensuring there is an entry for it in the aux primary map. The
664 secmap may be a distinguished one as the caller will only want to
665 be able to read it.
667 static INLINE SecMap* get_secmap_for_reading ( Addr a )
669 return ( a <= MAX_PRIMARY_ADDRESS
670 ? get_secmap_for_reading_low (a)
671 : get_secmap_for_reading_high(a) );
674 /* Produce the secmap for 'a', either from the primary map or by
675 ensuring there is an entry for it in the aux primary map. The
676 secmap may not be a distinguished one, since the caller will want
677 to be able to write it. If it is a distinguished secondary, make a
678 writable copy of it, install it, and return the copy instead. (COW
679 semantics).
681 static INLINE SecMap* get_secmap_for_writing ( Addr a )
683 return ( a <= MAX_PRIMARY_ADDRESS
684 ? get_secmap_for_writing_low (a)
685 : get_secmap_for_writing_high(a) );
688 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
689 allocate one if one doesn't already exist. This is used by the
690 leak checker.
692 static SecMap* maybe_get_secmap_for ( Addr a )
694 if (a <= MAX_PRIMARY_ADDRESS) {
695 return get_secmap_for_reading_low(a);
696 } else {
697 AuxMapEnt* am = maybe_find_in_auxmap(a);
698 return am ? am->sm : NULL;
702 /* --------------- Fundamental functions --------------- */
704 static INLINE
705 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
707 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
708 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
709 *vabits8 |= (vabits2 << shift); // mask in the two new bits
712 static INLINE
713 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
715 UInt shift;
716 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
717 shift = (a & 2) << 1; // shift by 0 or 4
718 *vabits8 &= ~(0xf << shift); // mask out the four old bits
719 *vabits8 |= (vabits4 << shift); // mask in the four new bits
722 static INLINE
723 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
725 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
726 vabits8 >>= shift; // shift the two bits to the bottom
727 return 0x3 & vabits8; // mask out the rest
730 static INLINE
731 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
733 UInt shift;
734 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
735 shift = (a & 2) << 1; // shift by 0 or 4
736 vabits8 >>= shift; // shift the four bits to the bottom
737 return 0xf & vabits8; // mask out the rest
740 // Note that these four are only used in slow cases. The fast cases do
741 // clever things like combine the auxmap check (in
742 // get_secmap_{read,writ}able) with alignment checks.
744 // *** WARNING! ***
745 // Any time this function is called, if it is possible that vabits2
746 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
747 // sec-V-bits table must also be set!
748 static INLINE
749 void set_vabits2 ( Addr a, UChar vabits2 )
751 SecMap* sm = get_secmap_for_writing(a);
752 UWord sm_off = SM_OFF(a);
753 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
756 static INLINE
757 UChar get_vabits2 ( Addr a )
759 SecMap* sm = get_secmap_for_reading(a);
760 UWord sm_off = SM_OFF(a);
761 UChar vabits8 = sm->vabits8[sm_off];
762 return extract_vabits2_from_vabits8(a, vabits8);
765 // *** WARNING! ***
766 // Any time this function is called, if it is possible that any of the
767 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
768 // corresponding entry(s) in the sec-V-bits table must also be set!
769 static INLINE
770 UChar get_vabits8_for_aligned_word32 ( Addr a )
772 SecMap* sm = get_secmap_for_reading(a);
773 UWord sm_off = SM_OFF(a);
774 UChar vabits8 = sm->vabits8[sm_off];
775 return vabits8;
778 static INLINE
779 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
781 SecMap* sm = get_secmap_for_writing(a);
782 UWord sm_off = SM_OFF(a);
783 sm->vabits8[sm_off] = vabits8;
787 // Forward declarations
788 static UWord get_sec_vbits8(Addr a);
789 static void set_sec_vbits8(Addr a, UWord vbits8);
791 // Returns False if there was an addressability error.
792 static INLINE
793 Bool set_vbits8 ( Addr a, UChar vbits8 )
795 Bool ok = True;
796 UChar vabits2 = get_vabits2(a);
797 if ( VA_BITS2_NOACCESS != vabits2 ) {
798 // Addressable. Convert in-register format to in-memory format.
799 // Also remove any existing sec V bit entry for the byte if no
800 // longer necessary.
801 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
802 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
803 else { vabits2 = VA_BITS2_PARTDEFINED;
804 set_sec_vbits8(a, vbits8); }
805 set_vabits2(a, vabits2);
807 } else {
808 // Unaddressable! Do nothing -- when writing to unaddressable
809 // memory it acts as a black hole, and the V bits can never be seen
810 // again. So we don't have to write them at all.
811 ok = False;
813 return ok;
816 // Returns False if there was an addressability error. In that case, we put
817 // all defined bits into vbits8.
818 static INLINE
819 Bool get_vbits8 ( Addr a, UChar* vbits8 )
821 Bool ok = True;
822 UChar vabits2 = get_vabits2(a);
824 // Convert the in-memory format to in-register format.
825 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
826 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
827 else if ( VA_BITS2_NOACCESS == vabits2 ) {
828 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
829 ok = False;
830 } else {
831 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
832 *vbits8 = get_sec_vbits8(a);
834 return ok;
838 /* --------------- Secondary V bit table ------------ */
840 // This table holds the full V bit pattern for partially-defined bytes
841 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
842 // memory.
844 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
845 // then overwrite the same address with a fully defined byte, the sec-V-bit
846 // node will not necessarily be removed. This is because checking for
847 // whether removal is necessary would slow down the fast paths.
849 // To avoid the stale nodes building up too much, we periodically (once the
850 // table reaches a certain size) garbage collect (GC) the table by
851 // traversing it and evicting any nodes not having PDB.
852 // If more than a certain proportion of nodes survived, we increase the
853 // table size so that GCs occur less often.
855 // This policy is designed to avoid bad table bloat in the worst case where
856 // a program creates huge numbers of stale PDBs -- we would get this bloat
857 // if we had no GC -- while handling well the case where a node becomes
858 // stale but shortly afterwards is rewritten with a PDB and so becomes
859 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
860 // remove all stale nodes as soon as possible, we just end up re-adding a
861 // lot of them in later again. The "sufficiently stale" approach avoids
862 // this. (If a program has many live PDBs, performance will just suck,
863 // there's no way around that.)
865 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
866 // holding on to stale entries for 2 GCs before discarding them can lead
867 // to massive space leaks. So we're changing to an arrangement where
868 // lines are evicted as soon as they are observed to be stale during a
869 // GC. This also has a side benefit of allowing the sufficiently_stale
870 // field to be removed from the SecVBitNode struct, reducing its size by
871 // 8 bytes, which is a substantial space saving considering that the
872 // struct was previously 32 or so bytes, on a 64 bit target.
874 // In order to try and mitigate the problem that the "sufficiently stale"
875 // heuristic was designed to avoid, the table size is allowed to drift
876 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
877 // means that nodes will exist in the table longer on average, and hopefully
878 // will be deleted and re-added less frequently.
880 // The previous scaling up mechanism (now called STEPUP) is retained:
881 // if residency exceeds 50%, the table is scaled up, although by a
882 // factor sqrt(2) rather than 2 as before. This effectively doubles the
883 // frequency of GCs when there are many PDBs at reduces the tendency of
884 // stale PDBs to reside for long periods in the table.
886 static OSet* secVBitTable;
888 // Stats
889 static ULong sec_vbits_new_nodes = 0;
890 static ULong sec_vbits_updates = 0;
892 // This must be a power of two; this is checked in mc_pre_clo_init().
893 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
894 // a larger address range) they take more space but we can get multiple
895 // partially-defined bytes in one if they are close to each other, reducing
896 // the number of total nodes. In practice sometimes they are clustered (eg.
897 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
898 // row), but often not. So we choose something intermediate.
899 #define BYTES_PER_SEC_VBIT_NODE 16
901 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
902 // more than this many nodes survive a GC.
903 #define STEPUP_SURVIVOR_PROPORTION 0.5
904 #define STEPUP_GROWTH_FACTOR 1.414213562
906 // If the above heuristic doesn't apply, then we may make the table
907 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
908 // this many nodes survive a GC, _and_ the total table size does
909 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
910 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
911 // effectively although gradually reduces residency and increases time
912 // between GCs for programs with small numbers of PDBs. The 80000 limit
913 // effectively limits the table size to around 2MB for programs with
914 // small numbers of PDBs, whilst giving a reasonably long lifetime to
915 // entries, to try and reduce the costs resulting from deleting and
916 // re-adding of entries.
917 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
918 #define DRIFTUP_GROWTH_FACTOR 1.015
919 #define DRIFTUP_MAX_SIZE 80000
921 // We GC the table when it gets this many nodes in it, ie. it's effectively
922 // the table size. It can change.
923 static Int secVBitLimit = 1000;
925 // The number of GCs done, used to age sec-V-bit nodes for eviction.
926 // Because it's unsigned, wrapping doesn't matter -- the right answer will
927 // come out anyway.
928 static UInt GCs_done = 0;
930 typedef
931 struct {
932 Addr a;
933 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
935 SecVBitNode;
937 static OSet* createSecVBitTable(void)
939 OSet* newSecVBitTable;
940 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
941 ( offsetof(SecVBitNode, a),
942 NULL, // use fast comparisons
943 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
944 VG_(free),
945 1000,
946 sizeof(SecVBitNode));
947 return newSecVBitTable;
950 static void gcSecVBitTable(void)
952 OSet* secVBitTable2;
953 SecVBitNode* n;
954 Int i, n_nodes = 0, n_survivors = 0;
956 GCs_done++;
958 // Create the new table.
959 secVBitTable2 = createSecVBitTable();
961 // Traverse the table, moving fresh nodes into the new table.
962 VG_(OSetGen_ResetIter)(secVBitTable);
963 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
964 // Keep node if any of its bytes are non-stale. Using
965 // get_vabits2() for the lookup is not very efficient, but I don't
966 // think it matters.
967 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
968 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
969 // Found a non-stale byte, so keep =>
970 // Insert a copy of the node into the new table.
971 SecVBitNode* n2 =
972 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
973 *n2 = *n;
974 VG_(OSetGen_Insert)(secVBitTable2, n2);
975 break;
980 // Get the before and after sizes.
981 n_nodes = VG_(OSetGen_Size)(secVBitTable);
982 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
984 // Destroy the old table, and put the new one in its place.
985 VG_(OSetGen_Destroy)(secVBitTable);
986 secVBitTable = secVBitTable2;
988 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
989 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
990 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
993 // Increase table size if necessary.
994 if ((Double)n_survivors
995 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
996 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
997 if (VG_(clo_verbosity) > 1)
998 VG_(message)(Vg_DebugMsg,
999 "memcheck GC: %d new table size (stepup)\n",
1000 secVBitLimit);
1002 else
1003 if (secVBitLimit < DRIFTUP_MAX_SIZE
1004 && (Double)n_survivors
1005 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
1006 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
1007 if (VG_(clo_verbosity) > 1)
1008 VG_(message)(Vg_DebugMsg,
1009 "memcheck GC: %d new table size (driftup)\n",
1010 secVBitLimit);
1014 static UWord get_sec_vbits8(Addr a)
1016 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1017 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1018 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1019 UChar vbits8;
1020 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1021 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1022 // make it to the secondary V bits table.
1023 vbits8 = n->vbits8[amod];
1024 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1025 return vbits8;
1028 static void set_sec_vbits8(Addr a, UWord vbits8)
1030 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1031 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1032 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1033 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1034 // make it to the secondary V bits table.
1035 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1036 if (n) {
1037 n->vbits8[amod] = vbits8; // update
1038 sec_vbits_updates++;
1039 } else {
1040 // Do a table GC if necessary. Nb: do this before creating and
1041 // inserting the new node, to avoid erroneously GC'ing the new node.
1042 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1043 gcSecVBitTable();
1046 // New node: assign the specific byte, make the rest invalid (they
1047 // should never be read as-is, but be cautious).
1048 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1049 n->a = aAligned;
1050 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1051 n->vbits8[i] = V_BITS8_UNDEFINED;
1053 n->vbits8[amod] = vbits8;
1055 // Insert the new node.
1056 VG_(OSetGen_Insert)(secVBitTable, n);
1057 sec_vbits_new_nodes++;
1059 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1060 if (n_secVBit_nodes > max_secVBit_nodes)
1061 max_secVBit_nodes = n_secVBit_nodes;
1065 /* --------------- Endianness helpers --------------- */
1067 /* Returns the offset in memory of the byteno-th most significant byte
1068 in a wordszB-sized word, given the specified endianness. */
1069 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1070 UWord byteno ) {
1071 return bigendian ? (wordszB-1-byteno) : byteno;
1075 /* --------------- Ignored address ranges --------------- */
1077 /* Denotes the address-error-reportability status for address ranges:
1078 IAR_NotIgnored: the usual case -- report errors in this range
1079 IAR_CommandLine: don't report errors -- from command line setting
1080 IAR_ClientReq: don't report errors -- from client request
1082 typedef
1083 enum { IAR_INVALID=99,
1084 IAR_NotIgnored,
1085 IAR_CommandLine,
1086 IAR_ClientReq }
1087 IARKind;
1089 static const HChar* showIARKind ( IARKind iark )
1091 switch (iark) {
1092 case IAR_INVALID: return "INVALID";
1093 case IAR_NotIgnored: return "NotIgnored";
1094 case IAR_CommandLine: return "CommandLine";
1095 case IAR_ClientReq: return "ClientReq";
1096 default: return "???";
1100 // RangeMap<IARKind>
1101 static RangeMap* gIgnoredAddressRanges = NULL;
1103 static void init_gIgnoredAddressRanges ( void )
1105 if (LIKELY(gIgnoredAddressRanges != NULL))
1106 return;
1107 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1108 VG_(free), IAR_NotIgnored );
1111 Bool MC_(in_ignored_range) ( Addr a )
1113 if (LIKELY(gIgnoredAddressRanges == NULL))
1114 return False;
1115 UWord how = IAR_INVALID;
1116 UWord key_min = ~(UWord)0;
1117 UWord key_max = (UWord)0;
1118 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1119 tl_assert(key_min <= a && a <= key_max);
1120 switch (how) {
1121 case IAR_NotIgnored: return False;
1122 case IAR_CommandLine: return True;
1123 case IAR_ClientReq: return True;
1124 default: break; /* invalid */
1126 VG_(tool_panic)("MC_(in_ignore_range)");
1127 /*NOTREACHED*/
1130 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
1132 if (LIKELY(!MC_(clo_ignore_range_below_sp)))
1133 return False;
1134 tl_assert(szB >= 1 && szB <= 32);
1135 tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
1136 > MC_(clo_ignore_range_below_sp__last_offset));
1137 Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
1138 Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
1139 if (range_lo >= range_hi) {
1140 /* Bizarre. We have a wraparound situation. What should we do? */
1141 return False; // Play safe
1142 } else {
1143 /* This is the expected case. */
1144 if (range_lo <= a && a + szB - 1 <= range_hi)
1145 return True;
1146 else
1147 return False;
1149 /*NOTREACHED*/
1150 tl_assert(0);
1153 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1155 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
1157 Bool ok = VG_(parse_Addr) (ppc, result1);
1158 if (!ok)
1159 return False;
1160 if (**ppc != '-')
1161 return False;
1162 (*ppc)++;
1163 ok = VG_(parse_Addr) (ppc, result2);
1164 if (!ok)
1165 return False;
1166 return True;
1169 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1170 or fail. */
1172 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
1174 Bool ok = VG_(parse_UInt) (ppc, result1);
1175 if (!ok)
1176 return False;
1177 if (**ppc != '-')
1178 return False;
1179 (*ppc)++;
1180 ok = VG_(parse_UInt) (ppc, result2);
1181 if (!ok)
1182 return False;
1183 return True;
1186 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1187 fail. If they are valid, add them to the global set of ignored
1188 ranges. */
1189 static Bool parse_ignore_ranges ( const HChar* str0 )
1191 init_gIgnoredAddressRanges();
1192 const HChar* str = str0;
1193 const HChar** ppc = &str;
1194 while (1) {
1195 Addr start = ~(Addr)0;
1196 Addr end = (Addr)0;
1197 Bool ok = parse_Addr_pair(ppc, &start, &end);
1198 if (!ok)
1199 return False;
1200 if (start > end)
1201 return False;
1202 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1203 if (**ppc == 0)
1204 return True;
1205 if (**ppc != ',')
1206 return False;
1207 (*ppc)++;
1209 /*NOTREACHED*/
1210 return False;
1213 /* Add or remove [start, +len) from the set of ignored ranges. */
1214 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1216 init_gIgnoredAddressRanges();
1217 const Bool verbose = (VG_(clo_verbosity) > 1);
1218 if (len == 0) {
1219 return False;
1221 if (addRange) {
1222 VG_(bindRangeMap)(gIgnoredAddressRanges,
1223 start, start+len-1, IAR_ClientReq);
1224 if (verbose)
1225 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1226 (void*)start, (void*)(start+len-1));
1227 } else {
1228 VG_(bindRangeMap)(gIgnoredAddressRanges,
1229 start, start+len-1, IAR_NotIgnored);
1230 if (verbose)
1231 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1232 (void*)start, (void*)(start+len-1));
1234 if (verbose) {
1235 VG_(dmsg)("memcheck: now have %u ranges:\n",
1236 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1237 UInt i;
1238 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1239 UWord val = IAR_INVALID;
1240 UWord key_min = ~(UWord)0;
1241 UWord key_max = (UWord)0;
1242 VG_(indexRangeMap)( &key_min, &key_max, &val,
1243 gIgnoredAddressRanges, i );
1244 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n",
1245 i, key_min, key_max, showIARKind(val));
1248 return True;
1252 /* --------------- Load/store slow cases. --------------- */
1254 static
1255 __attribute__((noinline))
1256 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1257 Addr a, SizeT nBits, Bool bigendian )
1259 ULong pessim[4]; /* only used when p-l-ok=yes */
1260 SSizeT szB = nBits / 8;
1261 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1262 SSizeT i, j; /* Must be signed. */
1263 SizeT n_addrs_bad = 0;
1264 Addr ai;
1265 UChar vbits8;
1266 Bool ok;
1268 /* Code below assumes load size is a power of two and at least 64
1269 bits. */
1270 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1272 /* If this triggers, you probably just need to increase the size of
1273 the pessim array. */
1274 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1276 for (j = 0; j < szL; j++) {
1277 pessim[j] = V_BITS64_DEFINED;
1278 res[j] = V_BITS64_UNDEFINED;
1281 /* Make up a result V word, which contains the loaded data for
1282 valid addresses and Defined for invalid addresses. Iterate over
1283 the bytes in the word, from the most significant down to the
1284 least. The vbits to return are calculated into vbits128. Also
1285 compute the pessimising value to be used when
1286 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1287 info can be gleaned from the pessim array) but is used as a
1288 cross-check. */
1289 for (j = szL-1; j >= 0; j--) {
1290 ULong vbits64 = V_BITS64_UNDEFINED;
1291 ULong pessim64 = V_BITS64_DEFINED;
1292 UWord long_index = byte_offset_w(szL, bigendian, j);
1293 for (i = 8-1; i >= 0; i--) {
1294 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1295 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1296 ok = get_vbits8(ai, &vbits8);
1297 vbits64 <<= 8;
1298 vbits64 |= vbits8;
1299 if (!ok) n_addrs_bad++;
1300 pessim64 <<= 8;
1301 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1303 res[long_index] = vbits64;
1304 pessim[long_index] = pessim64;
1307 /* In the common case, all the addresses involved are valid, so we
1308 just return the computed V bits and have done. */
1309 if (LIKELY(n_addrs_bad == 0))
1310 return;
1312 /* If there's no possibility of getting a partial-loads-ok
1313 exemption, report the error and quit. */
1314 if (!MC_(clo_partial_loads_ok)) {
1315 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1316 return;
1319 /* The partial-loads-ok excemption might apply. Find out if it
1320 does. If so, don't report an addressing error, but do return
1321 Undefined for the bytes that are out of range, so as to avoid
1322 false negatives. If it doesn't apply, just report an addressing
1323 error in the usual way. */
1325 /* Some code steps along byte strings in aligned chunks
1326 even when there is only a partially defined word at the end (eg,
1327 optimised strlen). This is allowed by the memory model of
1328 modern machines, since an aligned load cannot span two pages and
1329 thus cannot "partially fault".
1331 Therefore, a load from a partially-addressible place is allowed
1332 if all of the following hold:
1333 - the command-line flag is set [by default, it isn't]
1334 - it's an aligned load
1335 - at least one of the addresses in the word *is* valid
1337 Since this suppresses the addressing error, we avoid false
1338 negatives by marking bytes undefined when they come from an
1339 invalid address.
1342 /* "at least one of the addresses is invalid" */
1343 ok = False;
1344 for (j = 0; j < szL; j++)
1345 ok |= pessim[j] != V_BITS64_DEFINED;
1346 tl_assert(ok);
1348 if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1349 /* Exemption applies. Use the previously computed pessimising
1350 value and return the combined result, but don't flag an
1351 addressing error. The pessimising value is Defined for valid
1352 addresses and Undefined for invalid addresses. */
1353 /* for assumption that doing bitwise or implements UifU */
1354 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1355 /* (really need "UifU" here...)
1356 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1357 for (j = szL-1; j >= 0; j--)
1358 res[j] |= pessim[j];
1359 return;
1362 /* Exemption doesn't apply. Flag an addressing error in the normal
1363 way. */
1364 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1368 static
1369 __attribute__((noinline))
1370 __attribute__((used))
1371 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1372 this function may get called from hand written assembly. */
1373 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1375 PROF_EVENT(MCPE_LOADVN_SLOW);
1377 /* ------------ BEGIN semi-fast cases ------------ */
1378 /* These deal quickly-ish with the common auxiliary primary map
1379 cases on 64-bit platforms. Are merely a speedup hack; can be
1380 omitted without loss of correctness/functionality. Note that in
1381 both cases the "sizeof(void*) == 8" causes these cases to be
1382 folded out by compilers on 32-bit platforms. These are derived
1383 from LOADV64 and LOADV32.
1385 if (LIKELY(sizeof(void*) == 8
1386 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1387 SecMap* sm = get_secmap_for_reading(a);
1388 UWord sm_off16 = SM_OFF_16(a);
1389 UWord vabits16 = sm->vabits16[sm_off16];
1390 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1391 return V_BITS64_DEFINED;
1392 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1393 return V_BITS64_UNDEFINED;
1394 /* else fall into the slow case */
1396 if (LIKELY(sizeof(void*) == 8
1397 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1398 SecMap* sm = get_secmap_for_reading(a);
1399 UWord sm_off = SM_OFF(a);
1400 UWord vabits8 = sm->vabits8[sm_off];
1401 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1402 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1403 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1404 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1405 /* else fall into slow case */
1407 /* ------------ END semi-fast cases ------------ */
1409 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1410 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1411 SSizeT szB = nBits / 8;
1412 SSizeT i; /* Must be signed. */
1413 SizeT n_addrs_bad = 0;
1414 Addr ai;
1415 UChar vbits8;
1416 Bool ok;
1418 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1420 /* Make up a 64-bit result V word, which contains the loaded data
1421 for valid addresses and Defined for invalid addresses. Iterate
1422 over the bytes in the word, from the most significant down to
1423 the least. The vbits to return are calculated into vbits64.
1424 Also compute the pessimising value to be used when
1425 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1426 info can be gleaned from pessim64) but is used as a
1427 cross-check. */
1428 for (i = szB-1; i >= 0; i--) {
1429 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1430 ai = a + byte_offset_w(szB, bigendian, i);
1431 ok = get_vbits8(ai, &vbits8);
1432 vbits64 <<= 8;
1433 vbits64 |= vbits8;
1434 if (!ok) n_addrs_bad++;
1435 pessim64 <<= 8;
1436 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1439 /* In the common case, all the addresses involved are valid, so we
1440 just return the computed V bits and have done. */
1441 if (LIKELY(n_addrs_bad == 0))
1442 return vbits64;
1444 /* If there's no possibility of getting a partial-loads-ok
1445 exemption, report the error and quit. */
1446 if (!MC_(clo_partial_loads_ok)) {
1447 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1448 return vbits64;
1451 /* The partial-loads-ok excemption might apply. Find out if it
1452 does. If so, don't report an addressing error, but do return
1453 Undefined for the bytes that are out of range, so as to avoid
1454 false negatives. If it doesn't apply, just report an addressing
1455 error in the usual way. */
1457 /* Some code steps along byte strings in aligned word-sized chunks
1458 even when there is only a partially defined word at the end (eg,
1459 optimised strlen). This is allowed by the memory model of
1460 modern machines, since an aligned load cannot span two pages and
1461 thus cannot "partially fault". Despite such behaviour being
1462 declared undefined by ANSI C/C++.
1464 Therefore, a load from a partially-addressible place is allowed
1465 if all of the following hold:
1466 - the command-line flag is set [by default, it isn't]
1467 - it's a word-sized, word-aligned load
1468 - at least one of the addresses in the word *is* valid
1470 Since this suppresses the addressing error, we avoid false
1471 negatives by marking bytes undefined when they come from an
1472 invalid address.
1475 /* "at least one of the addresses is invalid" */
1476 tl_assert(pessim64 != V_BITS64_DEFINED);
1478 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1479 && n_addrs_bad < VG_WORDSIZE) {
1480 /* Exemption applies. Use the previously computed pessimising
1481 value for vbits64 and return the combined result, but don't
1482 flag an addressing error. The pessimising value is Defined
1483 for valid addresses and Undefined for invalid addresses. */
1484 /* for assumption that doing bitwise or implements UifU */
1485 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1486 /* (really need "UifU" here...)
1487 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1488 vbits64 |= pessim64;
1489 return vbits64;
1492 /* Also, in appears that gcc generates string-stepping code in
1493 32-bit chunks on 64 bit platforms. So, also grant an exception
1494 for this case. Note that the first clause of the conditional
1495 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1496 will get folded out in 32 bit builds. */
1497 if (VG_WORDSIZE == 8
1498 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1499 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1500 /* (really need "UifU" here...)
1501 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1502 vbits64 |= pessim64;
1503 /* Mark the upper 32 bits as undefined, just to be on the safe
1504 side. */
1505 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1506 return vbits64;
1509 /* Exemption doesn't apply. Flag an addressing error in the normal
1510 way. */
1511 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1513 return vbits64;
1517 static
1518 __attribute__((noinline))
1519 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1521 SizeT szB = nBits / 8;
1522 SizeT i, n_addrs_bad = 0;
1523 UChar vbits8;
1524 Addr ai;
1525 Bool ok;
1527 PROF_EVENT(MCPE_STOREVN_SLOW);
1529 /* ------------ BEGIN semi-fast cases ------------ */
1530 /* These deal quickly-ish with the common auxiliary primary map
1531 cases on 64-bit platforms. Are merely a speedup hack; can be
1532 omitted without loss of correctness/functionality. Note that in
1533 both cases the "sizeof(void*) == 8" causes these cases to be
1534 folded out by compilers on 32-bit platforms. The logic below
1535 is somewhat similar to some cases extensively commented in
1536 MC_(helperc_STOREV8).
1538 if (LIKELY(sizeof(void*) == 8
1539 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1540 SecMap* sm = get_secmap_for_reading(a);
1541 UWord sm_off16 = SM_OFF_16(a);
1542 UWord vabits16 = sm->vabits16[sm_off16];
1543 if (LIKELY( !is_distinguished_sm(sm) &&
1544 (VA_BITS16_DEFINED == vabits16 ||
1545 VA_BITS16_UNDEFINED == vabits16) )) {
1546 /* Handle common case quickly: a is suitably aligned, */
1547 /* is mapped, and is addressible. */
1548 // Convert full V-bits in register to compact 2-bit form.
1549 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1550 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
1551 return;
1552 } else if (V_BITS64_UNDEFINED == vbytes) {
1553 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
1554 return;
1556 /* else fall into the slow case */
1558 /* else fall into the slow case */
1560 if (LIKELY(sizeof(void*) == 8
1561 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1562 SecMap* sm = get_secmap_for_reading(a);
1563 UWord sm_off = SM_OFF(a);
1564 UWord vabits8 = sm->vabits8[sm_off];
1565 if (LIKELY( !is_distinguished_sm(sm) &&
1566 (VA_BITS8_DEFINED == vabits8 ||
1567 VA_BITS8_UNDEFINED == vabits8) )) {
1568 /* Handle common case quickly: a is suitably aligned, */
1569 /* is mapped, and is addressible. */
1570 // Convert full V-bits in register to compact 2-bit form.
1571 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1572 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1573 return;
1574 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1575 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1576 return;
1578 /* else fall into the slow case */
1580 /* else fall into the slow case */
1582 /* ------------ END semi-fast cases ------------ */
1584 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1586 /* Dump vbytes in memory, iterating from least to most significant
1587 byte. At the same time establish addressibility of the location. */
1588 for (i = 0; i < szB; i++) {
1589 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1590 ai = a + byte_offset_w(szB, bigendian, i);
1591 vbits8 = vbytes & 0xff;
1592 ok = set_vbits8(ai, vbits8);
1593 if (!ok) n_addrs_bad++;
1594 vbytes >>= 8;
1597 /* If an address error has happened, report it. */
1598 if (n_addrs_bad > 0)
1599 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1603 /*------------------------------------------------------------*/
1604 /*--- Setting permissions over address ranges. ---*/
1605 /*------------------------------------------------------------*/
1607 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1608 UWord dsm_num )
1610 UWord sm_off, sm_off16;
1611 UWord vabits2 = vabits16 & 0x3;
1612 SizeT lenA, lenB, len_to_next_secmap;
1613 Addr aNext;
1614 SecMap* sm;
1615 SecMap** sm_ptr;
1616 SecMap* example_dsm;
1618 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1620 /* Check the V+A bits make sense. */
1621 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1622 VA_BITS16_UNDEFINED == vabits16 ||
1623 VA_BITS16_DEFINED == vabits16);
1625 // This code should never write PDBs; ensure this. (See comment above
1626 // set_vabits2().)
1627 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1629 if (lenT == 0)
1630 return;
1632 if (lenT > 256 * 1024 * 1024) {
1633 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1634 const HChar* s = "unknown???";
1635 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1636 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1637 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1638 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1639 "large range [0x%lx, 0x%lx) (%s)\n",
1640 a, a + lenT, s);
1644 #ifndef PERF_FAST_SARP
1645 /*------------------ debug-only case ------------------ */
1647 // Endianness doesn't matter here because all bytes are being set to
1648 // the same value.
1649 // Nb: We don't have to worry about updating the sec-V-bits table
1650 // after these set_vabits2() calls because this code never writes
1651 // VA_BITS2_PARTDEFINED values.
1652 SizeT i;
1653 for (i = 0; i < lenT; i++) {
1654 set_vabits2(a + i, vabits2);
1656 return;
1658 #endif
1660 /*------------------ standard handling ------------------ */
1662 /* Get the distinguished secondary that we might want
1663 to use (part of the space-compression scheme). */
1664 example_dsm = &sm_distinguished[dsm_num];
1666 // We have to handle ranges covering various combinations of partial and
1667 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1668 // Cases marked with a '*' are common.
1670 // TYPE PARTS USED
1671 // ---- ----------
1672 // * one partial sec-map (p) 1
1673 // - one whole sec-map (P) 2
1675 // * two partial sec-maps (pp) 1,3
1676 // - one partial, one whole sec-map (pP) 1,2
1677 // - one whole, one partial sec-map (Pp) 2,3
1678 // - two whole sec-maps (PP) 2,2
1680 // * one partial, one whole, one partial (pPp) 1,2,3
1681 // - one partial, two whole (pPP) 1,2,2
1682 // - two whole, one partial (PPp) 2,2,3
1683 // - three whole (PPP) 2,2,2
1685 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1686 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1687 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1688 // - N whole (PP...PP) 2,2...2,3
1690 // Break up total length (lenT) into two parts: length in the first
1691 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1692 aNext = start_of_this_sm(a) + SM_SIZE;
1693 len_to_next_secmap = aNext - a;
1694 if ( lenT <= len_to_next_secmap ) {
1695 // Range entirely within one sec-map. Covers almost all cases.
1696 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1697 lenA = lenT;
1698 lenB = 0;
1699 } else if (is_start_of_sm(a)) {
1700 // Range spans at least one whole sec-map, and starts at the beginning
1701 // of a sec-map; skip to Part 2.
1702 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1703 lenA = 0;
1704 lenB = lenT;
1705 goto part2;
1706 } else {
1707 // Range spans two or more sec-maps, first one is partial.
1708 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1709 lenA = len_to_next_secmap;
1710 lenB = lenT - lenA;
1713 //------------------------------------------------------------------------
1714 // Part 1: Deal with the first sec_map. Most of the time the range will be
1715 // entirely within a sec_map and this part alone will suffice. Also,
1716 // doing it this way lets us avoid repeatedly testing for the crossing of
1717 // a sec-map boundary within these loops.
1718 //------------------------------------------------------------------------
1720 // If it's distinguished, make it undistinguished if necessary.
1721 sm_ptr = get_secmap_ptr(a);
1722 if (is_distinguished_sm(*sm_ptr)) {
1723 if (*sm_ptr == example_dsm) {
1724 // Sec-map already has the V+A bits that we want, so skip.
1725 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1726 a = aNext;
1727 lenA = 0;
1728 } else {
1729 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1730 *sm_ptr = copy_for_writing(*sm_ptr);
1733 sm = *sm_ptr;
1735 // 1 byte steps
1736 while (True) {
1737 if (VG_IS_8_ALIGNED(a)) break;
1738 if (lenA < 1) break;
1739 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1740 sm_off = SM_OFF(a);
1741 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1742 a += 1;
1743 lenA -= 1;
1745 // 8-aligned, 8 byte steps
1746 while (True) {
1747 if (lenA < 8) break;
1748 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1749 sm_off16 = SM_OFF_16(a);
1750 sm->vabits16[sm_off16] = vabits16;
1751 a += 8;
1752 lenA -= 8;
1754 // 1 byte steps
1755 while (True) {
1756 if (lenA < 1) break;
1757 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1758 sm_off = SM_OFF(a);
1759 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1760 a += 1;
1761 lenA -= 1;
1764 // We've finished the first sec-map. Is that it?
1765 if (lenB == 0)
1766 return;
1768 //------------------------------------------------------------------------
1769 // Part 2: Fast-set entire sec-maps at a time.
1770 //------------------------------------------------------------------------
1771 part2:
1772 // 64KB-aligned, 64KB steps.
1773 // Nb: we can reach here with lenB < SM_SIZE
1774 tl_assert(0 == lenA);
1775 while (True) {
1776 if (lenB < SM_SIZE) break;
1777 tl_assert(is_start_of_sm(a));
1778 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1779 sm_ptr = get_secmap_ptr(a);
1780 if (!is_distinguished_sm(*sm_ptr)) {
1781 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1782 // Free the non-distinguished sec-map that we're replacing. This
1783 // case happens moderately often, enough to be worthwhile.
1784 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1785 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1787 update_SM_counts(*sm_ptr, example_dsm);
1788 // Make the sec-map entry point to the example DSM
1789 *sm_ptr = example_dsm;
1790 lenB -= SM_SIZE;
1791 a += SM_SIZE;
1794 // We've finished the whole sec-maps. Is that it?
1795 if (lenB == 0)
1796 return;
1798 //------------------------------------------------------------------------
1799 // Part 3: Finish off the final partial sec-map, if necessary.
1800 //------------------------------------------------------------------------
1802 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1804 // If it's distinguished, make it undistinguished if necessary.
1805 sm_ptr = get_secmap_ptr(a);
1806 if (is_distinguished_sm(*sm_ptr)) {
1807 if (*sm_ptr == example_dsm) {
1808 // Sec-map already has the V+A bits that we want, so stop.
1809 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1810 return;
1811 } else {
1812 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1813 *sm_ptr = copy_for_writing(*sm_ptr);
1816 sm = *sm_ptr;
1818 // 8-aligned, 8 byte steps
1819 while (True) {
1820 if (lenB < 8) break;
1821 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1822 sm_off16 = SM_OFF_16(a);
1823 sm->vabits16[sm_off16] = vabits16;
1824 a += 8;
1825 lenB -= 8;
1827 // 1 byte steps
1828 while (True) {
1829 if (lenB < 1) return;
1830 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1831 sm_off = SM_OFF(a);
1832 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1833 a += 1;
1834 lenB -= 1;
1839 /* --- Set permissions for arbitrary address ranges --- */
1841 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1843 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1844 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1845 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1846 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1847 ocache_sarp_Clear_Origins ( a, len );
1850 static void make_mem_undefined ( Addr a, SizeT len )
1852 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1853 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1854 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1857 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1859 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1860 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1861 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1862 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1863 ocache_sarp_Set_Origins ( a, len, otag );
1866 static
1867 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1868 ThreadId tid, UInt okind )
1870 UInt ecu;
1871 ExeContext* here;
1872 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1873 if it is invalid. So no need to do it here. */
1874 tl_assert(okind <= 3);
1875 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1876 tl_assert(here);
1877 ecu = VG_(get_ECU_from_ExeContext)(here);
1878 tl_assert(VG_(is_plausible_ECU)(ecu));
1879 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1882 static
1883 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1885 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1888 static
1889 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1891 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1894 void MC_(make_mem_defined) ( Addr a, SizeT len )
1896 PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1897 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1898 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1899 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1900 ocache_sarp_Clear_Origins ( a, len );
1903 __attribute__((unused))
1904 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1906 MC_(make_mem_defined)(a, len);
1909 /* For each byte in [a,a+len), if the byte is addressable, make it be
1910 defined, but if it isn't addressible, leave it alone. In other
1911 words a version of MC_(make_mem_defined) that doesn't mess with
1912 addressibility. Low-performance implementation. */
1913 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1915 SizeT i;
1916 UChar vabits2;
1917 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1918 for (i = 0; i < len; i++) {
1919 vabits2 = get_vabits2( a+i );
1920 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1921 set_vabits2(a+i, VA_BITS2_DEFINED);
1922 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1923 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1929 /* Similarly (needed for mprotect handling ..) */
1930 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1932 SizeT i;
1933 UChar vabits2;
1934 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1935 for (i = 0; i < len; i++) {
1936 vabits2 = get_vabits2( a+i );
1937 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1938 set_vabits2(a+i, VA_BITS2_DEFINED);
1939 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1940 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1946 /* --- Block-copy permissions (needed for implementing realloc() and
1947 sys_mremap). --- */
1949 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1951 SizeT i, j;
1952 UChar vabits2, vabits8;
1953 Bool aligned, nooverlap;
1955 DEBUG("MC_(copy_address_range_state)\n");
1956 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
1958 if (len == 0 || src == dst)
1959 return;
1961 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1962 nooverlap = src+len <= dst || dst+len <= src;
1964 if (nooverlap && aligned) {
1966 /* Vectorised fast case, when no overlap and suitably aligned */
1967 /* vector loop */
1968 i = 0;
1969 while (len >= 4) {
1970 vabits8 = get_vabits8_for_aligned_word32( src+i );
1971 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1972 if (LIKELY(VA_BITS8_DEFINED == vabits8
1973 || VA_BITS8_UNDEFINED == vabits8
1974 || VA_BITS8_NOACCESS == vabits8)) {
1975 /* do nothing */
1976 } else {
1977 /* have to copy secondary map info */
1978 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1979 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1980 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1981 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1982 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1983 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1984 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1985 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1987 i += 4;
1988 len -= 4;
1990 /* fixup loop */
1991 while (len >= 1) {
1992 vabits2 = get_vabits2( src+i );
1993 set_vabits2( dst+i, vabits2 );
1994 if (VA_BITS2_PARTDEFINED == vabits2) {
1995 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1997 i++;
1998 len--;
2001 } else {
2003 /* We have to do things the slow way */
2004 if (src < dst) {
2005 for (i = 0, j = len-1; i < len; i++, j--) {
2006 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
2007 vabits2 = get_vabits2( src+j );
2008 set_vabits2( dst+j, vabits2 );
2009 if (VA_BITS2_PARTDEFINED == vabits2) {
2010 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
2015 if (src > dst) {
2016 for (i = 0; i < len; i++) {
2017 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
2018 vabits2 = get_vabits2( src+i );
2019 set_vabits2( dst+i, vabits2 );
2020 if (VA_BITS2_PARTDEFINED == vabits2) {
2021 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2030 /*------------------------------------------------------------*/
2031 /*--- Origin tracking stuff - cache basics ---*/
2032 /*------------------------------------------------------------*/
2034 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2035 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2037 Note that this implementation draws inspiration from the "origin
2038 tracking by value piggybacking" scheme described in "Tracking Bad
2039 Apples: Reporting the Origin of Null and Undefined Value Errors"
2040 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2041 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2042 implemented completely differently.
2044 Origin tags and ECUs -- about the shadow values
2045 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2047 This implementation tracks the defining point of all uninitialised
2048 values using so called "origin tags", which are 32-bit integers,
2049 rather than using the values themselves to encode the origins. The
2050 latter, so-called value piggybacking", is what the OOPSLA07 paper
2051 describes.
2053 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2054 ints (UInts), regardless of the machine's word size. Each tag
2055 comprises an upper 30-bit ECU field and a lower 2-bit
2056 'kind' field. The ECU field is a number given out by m_execontext
2057 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2058 directly as an origin tag (otag), but in fact we want to put
2059 additional information 'kind' field to indicate roughly where the
2060 tag came from. This helps print more understandable error messages
2061 for the user -- it has no other purpose. In summary:
2063 * Both ECUs and origin tags are represented as 32-bit words
2065 * m_execontext and the core-tool interface deal purely in ECUs.
2066 They have no knowledge of origin tags - that is a purely
2067 Memcheck-internal matter.
2069 * all valid ECUs have the lowest 2 bits zero and at least
2070 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2072 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2073 constants defined in mc_include.h.
2075 * to convert an otag back to an ECU, AND it with ~3
2077 One important fact is that no valid otag is zero. A zero otag is
2078 used by the implementation to indicate "no origin", which could
2079 mean that either the value is defined, or it is undefined but the
2080 implementation somehow managed to lose the origin.
2082 The ECU used for memory created by malloc etc is derived from the
2083 stack trace at the time the malloc etc happens. This means the
2084 mechanism can show the exact allocation point for heap-created
2085 uninitialised values.
2087 In contrast, it is simply too expensive to create a complete
2088 backtrace for each stack allocation. Therefore we merely use a
2089 depth-1 backtrace for stack allocations, which can be done once at
2090 translation time, rather than N times at run time. The result of
2091 this is that, for stack created uninitialised values, Memcheck can
2092 only show the allocating function, and not what called it.
2093 Furthermore, compilers tend to move the stack pointer just once at
2094 the start of the function, to allocate all locals, and so in fact
2095 the stack origin almost always simply points to the opening brace
2096 of the function. Net result is, for stack origins, the mechanism
2097 can tell you in which function the undefined value was created, but
2098 that's all. Users will need to carefully check all locals in the
2099 specified function.
2101 Shadowing registers and memory
2102 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2104 Memory is shadowed using a two level cache structure (ocacheL1 and
2105 ocacheL2). Memory references are first directed to ocacheL1. This
2106 is a traditional 2-way set associative cache with 32-byte lines and
2107 approximate LRU replacement within each set.
2109 A naive implementation would require storing one 32 bit otag for
2110 each byte of memory covered, a 4:1 space overhead. Instead, there
2111 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2112 that shows which of the 4 bytes have that shadow value and which
2113 have a shadow value of zero (indicating no origin). Hence a lot of
2114 space is saved, but the cost is that only one different origin per
2115 4 bytes of address space can be represented. This is a source of
2116 imprecision, but how much of a problem it really is remains to be
2117 seen.
2119 A cache line that contains all zeroes ("no origins") contains no
2120 useful information, and can be ejected from the L1 cache "for
2121 free", in the sense that a read miss on the L1 causes a line of
2122 zeroes to be installed. However, ejecting a line containing
2123 nonzeroes risks losing origin information permanently. In order to
2124 prevent such lossage, ejected nonzero lines are placed in a
2125 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2126 lines. This can grow arbitrarily large, and so should ensure that
2127 Memcheck runs out of memory in preference to losing useful origin
2128 info due to cache size limitations.
2130 Shadowing registers is a bit tricky, because the shadow values are
2131 32 bits, regardless of the size of the register. That gives a
2132 problem for registers smaller than 32 bits. The solution is to
2133 find spaces in the guest state that are unused, and use those to
2134 shadow guest state fragments smaller than 32 bits. For example, on
2135 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2136 shadow are allocated for the register's otag, then there are still
2137 12 bytes left over which could be used to shadow 3 other values.
2139 This implies there is some non-obvious mapping from guest state
2140 (start,length) pairs to the relevant shadow offset (for the origin
2141 tags). And it is unfortunately guest-architecture specific. The
2142 mapping is contained in mc_machine.c, which is quite lengthy but
2143 straightforward.
2145 Instrumenting the IR
2146 ~~~~~~~~~~~~~~~~~~~~
2148 Instrumentation is largely straightforward, and done by the
2149 functions schemeE and schemeS in mc_translate.c. These generate
2150 code for handling the origin tags of expressions (E) and statements
2151 (S) respectively. The rather strange names are a reference to the
2152 "compilation schemes" shown in Simon Peyton Jones' book "The
2153 Implementation of Functional Programming Languages" (Prentice Hall,
2154 1987, see
2155 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2157 schemeS merely arranges to move shadow values around the guest
2158 state to track the incoming IR. schemeE is largely trivial too.
2159 The only significant point is how to compute the otag corresponding
2160 to binary (or ternary, quaternary, etc) operator applications. The
2161 rule is simple: just take whichever value is larger (32-bit
2162 unsigned max). Constants get the special value zero. Hence this
2163 rule always propagates a nonzero (known) otag in preference to a
2164 zero (unknown, or more likely, value-is-defined) tag, as we want.
2165 If two different undefined values are inputs to a binary operator
2166 application, then which is propagated is arbitrary, but that
2167 doesn't matter, since the program is erroneous in using either of
2168 the values, and so there's no point in attempting to propagate
2169 both.
2171 Since constants are abstracted to (otag) zero, much of the
2172 instrumentation code can be folded out without difficulty by the
2173 generic post-instrumentation IR cleanup pass, using these rules:
2174 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2175 constants is evaluated at JIT time. And the resulting dead code
2176 removal. In practice this causes surprisingly few Max32Us to
2177 survive through to backend code generation.
2179 Integration with the V-bits machinery
2180 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2182 This is again largely straightforward. Mostly the otag and V bits
2183 stuff are independent. The only point of interaction is when the V
2184 bits instrumenter creates a call to a helper function to report an
2185 uninitialised value error -- in that case it must first use schemeE
2186 to get hold of the origin tag expression for the value, and pass
2187 that to the helper too.
2189 There is the usual stuff to do with setting address range
2190 permissions. When memory is painted undefined, we must also know
2191 the origin tag to paint with, which involves some tedious plumbing,
2192 particularly to do with the fast case stack handlers. When memory
2193 is painted defined or noaccess then the origin tags must be forced
2194 to zero.
2196 One of the goals of the implementation was to ensure that the
2197 non-origin tracking mode isn't slowed down at all. To do this,
2198 various functions to do with memory permissions setting (again,
2199 mostly pertaining to the stack) are duplicated for the with- and
2200 without-otag case.
2202 Dealing with stack redzones, and the NIA cache
2203 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2205 This is one of the few non-obvious parts of the implementation.
2207 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2208 reserved area below the stack pointer, that can be used as scratch
2209 space by compiler generated code for functions. In the Memcheck
2210 sources this is referred to as the "stack redzone". The important
2211 thing here is that such redzones are considered volatile across
2212 function calls and returns. So Memcheck takes care to mark them as
2213 undefined for each call and return, on the afflicted platforms.
2214 Past experience shows this is essential in order to get reliable
2215 messages about uninitialised values that come from the stack.
2217 So the question is, when we paint a redzone undefined, what origin
2218 tag should we use for it? Consider a function f() calling g(). If
2219 we paint the redzone using an otag derived from the ExeContext of
2220 the CALL/BL instruction in f, then any errors in g causing it to
2221 use uninitialised values that happen to lie in the redzone, will be
2222 reported as having their origin in f. Which is highly confusing.
2224 The same applies for returns: if, on a return, we paint the redzone
2225 using a origin tag derived from the ExeContext of the RET/BLR
2226 instruction in g, then any later errors in f causing it to use
2227 uninitialised values in the redzone, will be reported as having
2228 their origin in g. Which is just as confusing.
2230 To do it right, in both cases we need to use an origin tag which
2231 pertains to the instruction which dynamically follows the CALL/BL
2232 or RET/BLR. In short, one derived from the NIA - the "next
2233 instruction address".
2235 To make this work, Memcheck's redzone-painting helper,
2236 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2237 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2238 ExeContext's ECU as the basis for the otag used to paint the
2239 redzone. The expensive part of this is converting an NIA into an
2240 ECU, since this happens once for every call and every return. So
2241 we use a simple 511-line, 2-way set associative cache
2242 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2243 the cost out.
2245 Further background comments
2246 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2248 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2249 > it really just the address of the relevant ExeContext?
2251 Well, it's not the address, but a value which has a 1-1 mapping
2252 with ExeContexts, and is guaranteed not to be zero, since zero
2253 denotes (to memcheck) "unknown origin or defined value". So these
2254 UInts are just numbers starting at 4 and incrementing by 4; each
2255 ExeContext is given a number when it is created. (*** NOTE this
2256 confuses otags and ECUs; see comments above ***).
2258 Making these otags 32-bit regardless of the machine's word size
2259 makes the 64-bit implementation easier (next para). And it doesn't
2260 really limit us in any way, since for the tags to overflow would
2261 require that the program somehow caused 2^30-1 different
2262 ExeContexts to be created, in which case it is probably in deep
2263 trouble. Not to mention V will have soaked up many tens of
2264 gigabytes of memory merely to store them all.
2266 So having 64-bit origins doesn't really buy you anything, and has
2267 the following downsides:
2269 Suppose that instead, an otag is a UWord. This would mean that, on
2270 a 64-bit target,
2272 1. It becomes hard to shadow any element of guest state which is
2273 smaller than 8 bytes. To do so means you'd need to find some
2274 8-byte-sized hole in the guest state which you don't want to
2275 shadow, and use that instead to hold the otag. On ppc64, the
2276 condition code register(s) are split into 20 UChar sized pieces,
2277 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2278 and so that would entail finding 160 bytes somewhere else in the
2279 guest state.
2281 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2282 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2283 same) and so I had to look for 4 untracked otag-sized areas in
2284 the guest state to make that possible.
2286 The same problem exists of course when origin tags are only 32
2287 bits, but it's less extreme.
2289 2. (More compelling) it doubles the size of the origin shadow
2290 memory. Given that the shadow memory is organised as a fixed
2291 size cache, and that accuracy of tracking is limited by origins
2292 falling out the cache due to space conflicts, this isn't good.
2294 > Another question: is the origin tracking perfect, or are there
2295 > cases where it fails to determine an origin?
2297 It is imperfect for at least for the following reasons, and
2298 probably more:
2300 * Insufficient capacity in the origin cache. When a line is
2301 evicted from the cache it is gone forever, and so subsequent
2302 queries for the line produce zero, indicating no origin
2303 information. Interestingly, a line containing all zeroes can be
2304 evicted "free" from the cache, since it contains no useful
2305 information, so there is scope perhaps for some cleverer cache
2306 management schemes. (*** NOTE, with the introduction of the
2307 second level origin tag cache, ocacheL2, this is no longer a
2308 problem. ***)
2310 * The origin cache only stores one otag per 32-bits of address
2311 space, plus 4 bits indicating which of the 4 bytes has that tag
2312 and which are considered defined. The result is that if two
2313 undefined bytes in the same word are stored in memory, the first
2314 stored byte's origin will be lost and replaced by the origin for
2315 the second byte.
2317 * Nonzero origin tags for defined values. Consider a binary
2318 operator application op(x,y). Suppose y is undefined (and so has
2319 a valid nonzero origin tag), and x is defined, but erroneously
2320 has a nonzero origin tag (defined values should have tag zero).
2321 If the erroneous tag has a numeric value greater than y's tag,
2322 then the rule for propagating origin tags though binary
2323 operations, which is simply to take the unsigned max of the two
2324 tags, will erroneously propagate x's tag rather than y's.
2326 * Some obscure uses of x86/amd64 byte registers can cause lossage
2327 or confusion of origins. %AH .. %DH are treated as different
2328 from, and unrelated to, their parent registers, %EAX .. %EDX.
2329 So some weird sequences like
2331 movb undefined-value, %AH
2332 movb defined-value, %AL
2333 .. use %AX or %EAX ..
2335 will cause the origin attributed to %AH to be ignored, since %AL,
2336 %AX, %EAX are treated as the same register, and %AH as a
2337 completely separate one.
2339 But having said all that, it actually seems to work fairly well in
2340 practice.
2343 static UWord stats_ocacheL1_find = 0;
2344 static UWord stats_ocacheL1_found_at_1 = 0;
2345 static UWord stats_ocacheL1_found_at_N = 0;
2346 static UWord stats_ocacheL1_misses = 0;
2347 static UWord stats_ocacheL1_lossage = 0;
2348 static UWord stats_ocacheL1_movefwds = 0;
2350 static UWord stats__ocacheL2_refs = 0;
2351 static UWord stats__ocacheL2_misses = 0;
2352 static UWord stats__ocacheL2_n_nodes_max = 0;
2354 /* Cache of 32-bit values, one every 32 bits of address space */
2356 #define OC_BITS_PER_LINE 5
2357 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2359 static INLINE UWord oc_line_offset ( Addr a ) {
2360 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2362 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2363 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2366 #define OC_LINES_PER_SET 2
2368 #define OC_N_SET_BITS 20
2369 #define OC_N_SETS (1 << OC_N_SET_BITS)
2371 /* These settings give:
2372 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2373 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2376 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2379 typedef
2380 struct {
2381 Addr tag;
2382 UInt w32[OC_W32S_PER_LINE];
2383 UChar descr[OC_W32S_PER_LINE];
2385 OCacheLine;
2387 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2388 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2389 and 'z' if all the represented tags are zero. */
2390 static UChar classify_OCacheLine ( OCacheLine* line )
2392 UWord i;
2393 if (line->tag == 1/*invalid*/)
2394 return 'e'; /* EMPTY */
2395 tl_assert(is_valid_oc_tag(line->tag));
2396 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2397 tl_assert(0 == ((~0xF) & line->descr[i]));
2398 if (line->w32[i] > 0 && line->descr[i] > 0)
2399 return 'n'; /* NONZERO - contains useful info */
2401 return 'z'; /* ZERO - no useful info */
2404 typedef
2405 struct {
2406 OCacheLine line[OC_LINES_PER_SET];
2408 OCacheSet;
2410 typedef
2411 struct {
2412 OCacheSet set[OC_N_SETS];
2414 OCache;
2416 static OCache* ocacheL1 = NULL;
2417 static UWord ocacheL1_event_ctr = 0;
2419 static void init_ocacheL2 ( void ); /* fwds */
2420 static void init_OCache ( void )
2422 UWord line, set;
2423 tl_assert(MC_(clo_mc_level) >= 3);
2424 tl_assert(ocacheL1 == NULL);
2425 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2426 if (ocacheL1 == NULL) {
2427 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2428 sizeof(OCache) );
2430 tl_assert(ocacheL1 != NULL);
2431 for (set = 0; set < OC_N_SETS; set++) {
2432 for (line = 0; line < OC_LINES_PER_SET; line++) {
2433 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2436 init_ocacheL2();
2439 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2441 OCacheLine tmp;
2442 stats_ocacheL1_movefwds++;
2443 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2444 tmp = set->line[lineno-1];
2445 set->line[lineno-1] = set->line[lineno];
2446 set->line[lineno] = tmp;
2449 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2450 UWord i;
2451 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2452 line->w32[i] = 0; /* NO ORIGIN */
2453 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2455 line->tag = tag;
2458 //////////////////////////////////////////////////////////////
2459 //// OCache backing store
2461 static OSet* ocacheL2 = NULL;
2463 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2464 return VG_(malloc)(cc, szB);
2466 static void ocacheL2_free ( void* v ) {
2467 VG_(free)( v );
2470 /* Stats: # nodes currently in tree */
2471 static UWord stats__ocacheL2_n_nodes = 0;
2473 static void init_ocacheL2 ( void )
2475 tl_assert(!ocacheL2);
2476 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2477 tl_assert(0 == offsetof(OCacheLine,tag));
2478 ocacheL2
2479 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2480 NULL, /* fast cmp */
2481 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2482 stats__ocacheL2_n_nodes = 0;
2485 /* Find line with the given tag in the tree, or NULL if not found. */
2486 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2488 OCacheLine* line;
2489 tl_assert(is_valid_oc_tag(tag));
2490 stats__ocacheL2_refs++;
2491 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2492 return line;
2495 /* Delete the line with the given tag from the tree, if it is present, and
2496 free up the associated memory. */
2497 static void ocacheL2_del_tag ( Addr tag )
2499 OCacheLine* line;
2500 tl_assert(is_valid_oc_tag(tag));
2501 stats__ocacheL2_refs++;
2502 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2503 if (line) {
2504 VG_(OSetGen_FreeNode)(ocacheL2, line);
2505 tl_assert(stats__ocacheL2_n_nodes > 0);
2506 stats__ocacheL2_n_nodes--;
2510 /* Add a copy of the given line to the tree. It must not already be
2511 present. */
2512 static void ocacheL2_add_line ( OCacheLine* line )
2514 OCacheLine* copy;
2515 tl_assert(is_valid_oc_tag(line->tag));
2516 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2517 *copy = *line;
2518 stats__ocacheL2_refs++;
2519 VG_(OSetGen_Insert)( ocacheL2, copy );
2520 stats__ocacheL2_n_nodes++;
2521 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2522 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2525 ////
2526 //////////////////////////////////////////////////////////////
2528 __attribute__((noinline))
2529 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2531 OCacheLine *victim, *inL2;
2532 UChar c;
2533 UWord line;
2534 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2535 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2536 UWord tag = a & tagmask;
2537 tl_assert(setno >= 0 && setno < OC_N_SETS);
2539 /* we already tried line == 0; skip therefore. */
2540 for (line = 1; line < OC_LINES_PER_SET; line++) {
2541 if (ocacheL1->set[setno].line[line].tag == tag) {
2542 if (line == 1) {
2543 stats_ocacheL1_found_at_1++;
2544 } else {
2545 stats_ocacheL1_found_at_N++;
2547 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2548 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2549 moveLineForwards( &ocacheL1->set[setno], line );
2550 line--;
2552 return &ocacheL1->set[setno].line[line];
2556 /* A miss. Use the last slot. Implicitly this means we're
2557 ejecting the line in the last slot. */
2558 stats_ocacheL1_misses++;
2559 tl_assert(line == OC_LINES_PER_SET);
2560 line--;
2561 tl_assert(line > 0);
2563 /* First, move the to-be-ejected line to the L2 cache. */
2564 victim = &ocacheL1->set[setno].line[line];
2565 c = classify_OCacheLine(victim);
2566 switch (c) {
2567 case 'e':
2568 /* the line is empty (has invalid tag); ignore it. */
2569 break;
2570 case 'z':
2571 /* line contains zeroes. We must ensure the backing store is
2572 updated accordingly, either by copying the line there
2573 verbatim, or by ensuring it isn't present there. We
2574 chosse the latter on the basis that it reduces the size of
2575 the backing store. */
2576 ocacheL2_del_tag( victim->tag );
2577 break;
2578 case 'n':
2579 /* line contains at least one real, useful origin. Copy it
2580 to the backing store. */
2581 stats_ocacheL1_lossage++;
2582 inL2 = ocacheL2_find_tag( victim->tag );
2583 if (inL2) {
2584 *inL2 = *victim;
2585 } else {
2586 ocacheL2_add_line( victim );
2588 break;
2589 default:
2590 tl_assert(0);
2593 /* Now we must reload the L1 cache from the backing tree, if
2594 possible. */
2595 tl_assert(tag != victim->tag); /* stay sane */
2596 inL2 = ocacheL2_find_tag( tag );
2597 if (inL2) {
2598 /* We're in luck. It's in the L2. */
2599 ocacheL1->set[setno].line[line] = *inL2;
2600 } else {
2601 /* Missed at both levels of the cache hierarchy. We have to
2602 declare it as full of zeroes (unknown origins). */
2603 stats__ocacheL2_misses++;
2604 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2607 /* Move it one forwards */
2608 moveLineForwards( &ocacheL1->set[setno], line );
2609 line--;
2611 return &ocacheL1->set[setno].line[line];
2614 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2616 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2617 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2618 UWord tag = a & tagmask;
2620 stats_ocacheL1_find++;
2622 if (OC_ENABLE_ASSERTIONS) {
2623 tl_assert(setno >= 0 && setno < OC_N_SETS);
2624 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2627 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2628 return &ocacheL1->set[setno].line[0];
2631 return find_OCacheLine_SLOW( a );
2634 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2636 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2637 //// Set the origins for a+0 .. a+7
2638 { OCacheLine* line;
2639 UWord lineoff = oc_line_offset(a);
2640 if (OC_ENABLE_ASSERTIONS) {
2641 tl_assert(lineoff >= 0
2642 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2644 line = find_OCacheLine( a );
2645 line->descr[lineoff+0] = 0xF;
2646 line->descr[lineoff+1] = 0xF;
2647 line->w32[lineoff+0] = otag;
2648 line->w32[lineoff+1] = otag;
2650 //// END inlined, specialised version of MC_(helperc_b_store8)
2654 /*------------------------------------------------------------*/
2655 /*--- Aligned fast case permission setters, ---*/
2656 /*--- for dealing with stacks ---*/
2657 /*------------------------------------------------------------*/
2659 /*--------------------- 32-bit ---------------------*/
2661 /* Nb: by "aligned" here we mean 4-byte aligned */
2663 static INLINE void make_aligned_word32_undefined ( Addr a )
2665 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2667 #ifndef PERF_FAST_STACK2
2668 make_mem_undefined(a, 4);
2669 #else
2671 UWord sm_off;
2672 SecMap* sm;
2674 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2675 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2676 make_mem_undefined(a, 4);
2677 return;
2680 sm = get_secmap_for_writing_low(a);
2681 sm_off = SM_OFF(a);
2682 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2684 #endif
2687 static INLINE
2688 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2690 make_aligned_word32_undefined(a);
2691 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2692 //// Set the origins for a+0 .. a+3
2693 { OCacheLine* line;
2694 UWord lineoff = oc_line_offset(a);
2695 if (OC_ENABLE_ASSERTIONS) {
2696 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2698 line = find_OCacheLine( a );
2699 line->descr[lineoff] = 0xF;
2700 line->w32[lineoff] = otag;
2702 //// END inlined, specialised version of MC_(helperc_b_store4)
2705 static INLINE
2706 void make_aligned_word32_noaccess ( Addr a )
2708 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2710 #ifndef PERF_FAST_STACK2
2711 MC_(make_mem_noaccess)(a, 4);
2712 #else
2714 UWord sm_off;
2715 SecMap* sm;
2717 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2718 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2719 MC_(make_mem_noaccess)(a, 4);
2720 return;
2723 sm = get_secmap_for_writing_low(a);
2724 sm_off = SM_OFF(a);
2725 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2727 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2728 //// Set the origins for a+0 .. a+3.
2729 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2730 OCacheLine* line;
2731 UWord lineoff = oc_line_offset(a);
2732 if (OC_ENABLE_ASSERTIONS) {
2733 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2735 line = find_OCacheLine( a );
2736 line->descr[lineoff] = 0;
2738 //// END inlined, specialised version of MC_(helperc_b_store4)
2740 #endif
2743 /*--------------------- 64-bit ---------------------*/
2745 /* Nb: by "aligned" here we mean 8-byte aligned */
2747 static INLINE void make_aligned_word64_undefined ( Addr a )
2749 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2751 #ifndef PERF_FAST_STACK2
2752 make_mem_undefined(a, 8);
2753 #else
2755 UWord sm_off16;
2756 SecMap* sm;
2758 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2759 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2760 make_mem_undefined(a, 8);
2761 return;
2764 sm = get_secmap_for_writing_low(a);
2765 sm_off16 = SM_OFF_16(a);
2766 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
2768 #endif
2771 static INLINE
2772 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2774 make_aligned_word64_undefined(a);
2775 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2776 //// Set the origins for a+0 .. a+7
2777 { OCacheLine* line;
2778 UWord lineoff = oc_line_offset(a);
2779 tl_assert(lineoff >= 0
2780 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2781 line = find_OCacheLine( a );
2782 line->descr[lineoff+0] = 0xF;
2783 line->descr[lineoff+1] = 0xF;
2784 line->w32[lineoff+0] = otag;
2785 line->w32[lineoff+1] = otag;
2787 //// END inlined, specialised version of MC_(helperc_b_store8)
2790 static INLINE
2791 void make_aligned_word64_noaccess ( Addr a )
2793 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2795 #ifndef PERF_FAST_STACK2
2796 MC_(make_mem_noaccess)(a, 8);
2797 #else
2799 UWord sm_off16;
2800 SecMap* sm;
2802 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2803 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2804 MC_(make_mem_noaccess)(a, 8);
2805 return;
2808 sm = get_secmap_for_writing_low(a);
2809 sm_off16 = SM_OFF_16(a);
2810 sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
2812 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2813 //// Clear the origins for a+0 .. a+7.
2814 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2815 OCacheLine* line;
2816 UWord lineoff = oc_line_offset(a);
2817 tl_assert(lineoff >= 0
2818 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2819 line = find_OCacheLine( a );
2820 line->descr[lineoff+0] = 0;
2821 line->descr[lineoff+1] = 0;
2823 //// END inlined, specialised version of MC_(helperc_b_store8)
2825 #endif
2829 /*------------------------------------------------------------*/
2830 /*--- Stack pointer adjustment ---*/
2831 /*------------------------------------------------------------*/
2833 #ifdef PERF_FAST_STACK
2834 # define MAYBE_USED
2835 #else
2836 # define MAYBE_USED __attribute__((unused))
2837 #endif
2839 /*--------------- adjustment by 4 bytes ---------------*/
2841 MAYBE_USED
2842 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2844 UInt otag = ecu | MC_OKIND_STACK;
2845 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2846 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2847 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2848 } else {
2849 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2853 MAYBE_USED
2854 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2856 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2857 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2858 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2859 } else {
2860 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2864 MAYBE_USED
2865 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2867 PROF_EVENT(MCPE_DIE_MEM_STACK_4);
2868 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2869 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2870 } else {
2871 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2875 /*--------------- adjustment by 8 bytes ---------------*/
2877 MAYBE_USED
2878 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2880 UInt otag = ecu | MC_OKIND_STACK;
2881 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2882 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2883 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2884 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2885 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2886 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2887 } else {
2888 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2892 MAYBE_USED
2893 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2895 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2896 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2897 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2898 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2899 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2900 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2901 } else {
2902 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2906 MAYBE_USED
2907 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2909 PROF_EVENT(MCPE_DIE_MEM_STACK_8);
2910 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2912 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2913 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2914 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2915 } else {
2916 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2920 /*--------------- adjustment by 12 bytes ---------------*/
2922 MAYBE_USED
2923 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2925 UInt otag = ecu | MC_OKIND_STACK;
2926 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2927 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2928 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2929 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2930 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2931 /* from previous test we don't have 8-alignment at offset +0,
2932 hence must have 8 alignment at offsets +4/-4. Hence safe to
2933 do 4 at +0 and then 8 at +4/. */
2934 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2936 } else {
2937 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2941 MAYBE_USED
2942 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2944 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2945 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2946 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2947 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2948 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2949 /* from previous test we don't have 8-alignment at offset +0,
2950 hence must have 8 alignment at offsets +4/-4. Hence safe to
2951 do 4 at +0 and then 8 at +4/. */
2952 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2954 } else {
2955 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2959 MAYBE_USED
2960 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2962 PROF_EVENT(MCPE_DIE_MEM_STACK_12);
2963 /* Note the -12 in the test */
2964 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2965 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2966 -4. */
2967 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2968 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2969 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2970 /* We have 4-alignment at +0, but we don't have 8-alignment at
2971 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2972 and then 8 at -8. */
2973 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2974 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2975 } else {
2976 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2980 /*--------------- adjustment by 16 bytes ---------------*/
2982 MAYBE_USED
2983 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2985 UInt otag = ecu | MC_OKIND_STACK;
2986 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
2987 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2988 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2989 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2990 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2991 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2992 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2993 Hence do 4 at +0, 8 at +4, 4 at +12. */
2994 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2996 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2997 } else {
2998 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
3002 MAYBE_USED
3003 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
3005 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3006 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3008 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3009 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3010 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3011 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3012 Hence do 4 at +0, 8 at +4, 4 at +12. */
3013 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3014 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3015 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3016 } else {
3017 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
3021 MAYBE_USED
3022 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
3024 PROF_EVENT(MCPE_DIE_MEM_STACK_16);
3025 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3026 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3027 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3028 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3029 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3031 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3032 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3033 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3034 } else {
3035 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
3039 /*--------------- adjustment by 32 bytes ---------------*/
3041 MAYBE_USED
3042 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
3044 UInt otag = ecu | MC_OKIND_STACK;
3045 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3046 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3047 /* Straightforward */
3048 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3049 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3050 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3051 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3052 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3053 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3054 +0,+28. */
3055 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3056 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3057 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3058 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3059 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3060 } else {
3061 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3065 MAYBE_USED
3066 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3068 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3069 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3070 /* Straightforward */
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3072 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3073 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3074 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3075 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3076 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3077 +0,+28. */
3078 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3079 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3080 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3081 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3082 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3083 } else {
3084 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3088 MAYBE_USED
3089 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3091 PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3092 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3093 /* Straightforward */
3094 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3095 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3096 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3097 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3098 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3099 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3100 4 at -32,-4. */
3101 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3102 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3103 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3104 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3105 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3106 } else {
3107 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3111 /*--------------- adjustment by 112 bytes ---------------*/
3113 MAYBE_USED
3114 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3116 UInt otag = ecu | MC_OKIND_STACK;
3117 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3118 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3119 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3120 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3121 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3122 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3123 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3124 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3125 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3126 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3127 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3128 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3129 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3130 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3131 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3132 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3133 } else {
3134 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3138 MAYBE_USED
3139 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3141 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3142 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3144 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3145 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3146 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3147 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3148 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3149 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3150 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3151 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3152 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3153 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3154 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3155 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3156 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3157 } else {
3158 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3162 MAYBE_USED
3163 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3165 PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3166 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3174 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3175 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3176 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3177 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3178 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3179 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3180 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3181 } else {
3182 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3186 /*--------------- adjustment by 128 bytes ---------------*/
3188 MAYBE_USED
3189 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3191 UInt otag = ecu | MC_OKIND_STACK;
3192 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3193 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3194 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3195 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3196 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3197 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3198 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3199 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3200 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3201 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3202 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3203 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3204 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3205 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3206 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3207 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3208 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3209 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3210 } else {
3211 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3215 MAYBE_USED
3216 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3218 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3219 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3220 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3221 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3222 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3223 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3224 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3225 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3226 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3227 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3228 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3229 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3230 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3231 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3232 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3233 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3234 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3235 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3236 } else {
3237 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3241 MAYBE_USED
3242 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3244 PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3245 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3246 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3247 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3248 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3249 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3250 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3251 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3252 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3253 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3254 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3255 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3256 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3257 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3258 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3259 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3260 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3261 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3262 } else {
3263 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3267 /*--------------- adjustment by 144 bytes ---------------*/
3269 MAYBE_USED
3270 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3272 UInt otag = ecu | MC_OKIND_STACK;
3273 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3274 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3275 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3276 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3277 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3278 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3279 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3280 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3281 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3282 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3283 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3284 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3285 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3286 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3287 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3288 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3289 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3290 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3291 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3292 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3293 } else {
3294 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3298 MAYBE_USED
3299 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3301 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3302 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3303 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3304 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3305 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3306 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3307 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3308 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3309 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3310 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3311 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3312 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3313 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3314 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3315 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3316 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3321 } else {
3322 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3326 MAYBE_USED
3327 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3329 PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3330 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3331 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3332 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3333 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3334 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3335 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3336 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3337 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3338 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3339 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3340 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3349 } else {
3350 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3354 /*--------------- adjustment by 160 bytes ---------------*/
3356 MAYBE_USED
3357 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3359 UInt otag = ecu | MC_OKIND_STACK;
3360 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3361 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3362 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3363 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3364 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3365 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3366 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3367 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3379 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3380 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3381 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3382 } else {
3383 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3387 MAYBE_USED
3388 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3390 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3391 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3392 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3393 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3406 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3407 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3408 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3409 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3410 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3411 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3412 } else {
3413 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3417 MAYBE_USED
3418 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3420 PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3421 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3432 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3433 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3434 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3435 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3436 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3437 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3438 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3439 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3440 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3441 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3442 } else {
3443 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3447 /*--------------- adjustment by N bytes ---------------*/
3449 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3451 UInt otag = ecu | MC_OKIND_STACK;
3452 PROF_EVENT(MCPE_NEW_MEM_STACK);
3453 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3456 static void mc_new_mem_stack ( Addr a, SizeT len )
3458 PROF_EVENT(MCPE_NEW_MEM_STACK);
3459 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3462 static void mc_die_mem_stack ( Addr a, SizeT len )
3464 PROF_EVENT(MCPE_DIE_MEM_STACK);
3465 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3469 /* The AMD64 ABI says:
3471 "The 128-byte area beyond the location pointed to by %rsp is considered
3472 to be reserved and shall not be modified by signal or interrupt
3473 handlers. Therefore, functions may use this area for temporary data
3474 that is not needed across function calls. In particular, leaf functions
3475 may use this area for their entire stack frame, rather than adjusting
3476 the stack pointer in the prologue and epilogue. This area is known as
3477 red zone [sic]."
3479 So after any call or return we need to mark this redzone as containing
3480 undefined values.
3482 Consider this: we're in function f. f calls g. g moves rsp down
3483 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3484 defined. g returns. f is buggy and reads from parts of the red zone
3485 that it didn't write on. But because g filled that area in, f is going
3486 to be picking up defined V bits and so any errors from reading bits of
3487 the red zone it didn't write, will be missed. The only solution I could
3488 think of was to make the red zone undefined when g returns to f.
3490 This is in accordance with the ABI, which makes it clear the redzone
3491 is volatile across function calls.
3493 The problem occurs the other way round too: f could fill the RZ up
3494 with defined values and g could mistakenly read them. So the RZ
3495 also needs to be nuked on function calls.
3499 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3500 improved so as to have a lower miss rate. */
3502 static UWord stats__nia_cache_queries = 0;
3503 static UWord stats__nia_cache_misses = 0;
3505 typedef
3506 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3507 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3508 WCacheEnt;
3510 #define N_NIA_TO_ECU_CACHE 511
3512 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3514 static void init_nia_to_ecu_cache ( void )
3516 UWord i;
3517 Addr zero_addr = 0;
3518 ExeContext* zero_ec;
3519 UInt zero_ecu;
3520 /* Fill all the slots with an entry for address zero, and the
3521 relevant otags accordingly. Hence the cache is initially filled
3522 with valid data. */
3523 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3524 tl_assert(zero_ec);
3525 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3526 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3527 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3528 nia_to_ecu_cache[i].nia0 = zero_addr;
3529 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3530 nia_to_ecu_cache[i].nia1 = zero_addr;
3531 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3535 static inline UInt convert_nia_to_ecu ( Addr nia )
3537 UWord i;
3538 UInt ecu;
3539 ExeContext* ec;
3541 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3543 stats__nia_cache_queries++;
3544 i = nia % N_NIA_TO_ECU_CACHE;
3545 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3547 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3548 return nia_to_ecu_cache[i].ecu0;
3550 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3551 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3552 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3553 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3554 # undef SWAP
3555 return nia_to_ecu_cache[i].ecu0;
3558 stats__nia_cache_misses++;
3559 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3560 tl_assert(ec);
3561 ecu = VG_(get_ECU_from_ExeContext)(ec);
3562 tl_assert(VG_(is_plausible_ECU)(ecu));
3564 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3565 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3567 nia_to_ecu_cache[i].nia0 = nia;
3568 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3569 return ecu;
3573 /* This marks the stack as addressible but undefined, after a call or
3574 return for a target that has an ABI defined stack redzone. It
3575 happens quite a lot and needs to be fast. This is the version for
3576 origin tracking. The non-origin-tracking version is below. */
3577 VG_REGPARM(3)
3578 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
3580 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
3581 if (0)
3582 VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3583 base, len, nia );
3585 UInt ecu = convert_nia_to_ecu ( nia );
3586 tl_assert(VG_(is_plausible_ECU)(ecu));
3588 UInt otag = ecu | MC_OKIND_STACK;
3590 # if 0
3591 /* Slow(ish) version, which is fairly easily seen to be correct.
3593 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3594 make_aligned_word64_undefined_w_otag(base + 0, otag);
3595 make_aligned_word64_undefined_w_otag(base + 8, otag);
3596 make_aligned_word64_undefined_w_otag(base + 16, otag);
3597 make_aligned_word64_undefined_w_otag(base + 24, otag);
3599 make_aligned_word64_undefined_w_otag(base + 32, otag);
3600 make_aligned_word64_undefined_w_otag(base + 40, otag);
3601 make_aligned_word64_undefined_w_otag(base + 48, otag);
3602 make_aligned_word64_undefined_w_otag(base + 56, otag);
3604 make_aligned_word64_undefined_w_otag(base + 64, otag);
3605 make_aligned_word64_undefined_w_otag(base + 72, otag);
3606 make_aligned_word64_undefined_w_otag(base + 80, otag);
3607 make_aligned_word64_undefined_w_otag(base + 88, otag);
3609 make_aligned_word64_undefined_w_otag(base + 96, otag);
3610 make_aligned_word64_undefined_w_otag(base + 104, otag);
3611 make_aligned_word64_undefined_w_otag(base + 112, otag);
3612 make_aligned_word64_undefined_w_otag(base + 120, otag);
3613 } else {
3614 MC_(make_mem_undefined_w_otag)(base, len, otag);
3616 # endif
3618 /* Idea is: go fast when
3619 * 8-aligned and length is 128
3620 * the sm is available in the main primary map
3621 * the address range falls entirely with a single secondary map
3622 If all those conditions hold, just update the V+A bits by writing
3623 directly into the vabits array. (If the sm was distinguished, this
3624 will make a copy and then write to it.)
3626 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3627 /* Now we know the address range is suitably sized and aligned. */
3628 UWord a_lo = (UWord)(base);
3629 UWord a_hi = (UWord)(base + 128 - 1);
3630 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3631 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3632 /* Now we know the entire range is within the main primary map. */
3633 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3634 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3635 if (LIKELY(pm_off_lo == pm_off_hi)) {
3636 /* Now we know that the entire address range falls within a
3637 single secondary map, and that that secondary 'lives' in
3638 the main primary map. */
3639 SecMap* sm = get_secmap_for_writing_low(a_lo);
3640 UWord v_off16 = SM_OFF_16(a_lo);
3641 UShort* p = &sm->vabits16[v_off16];
3642 p[ 0] = VA_BITS16_UNDEFINED;
3643 p[ 1] = VA_BITS16_UNDEFINED;
3644 p[ 2] = VA_BITS16_UNDEFINED;
3645 p[ 3] = VA_BITS16_UNDEFINED;
3646 p[ 4] = VA_BITS16_UNDEFINED;
3647 p[ 5] = VA_BITS16_UNDEFINED;
3648 p[ 6] = VA_BITS16_UNDEFINED;
3649 p[ 7] = VA_BITS16_UNDEFINED;
3650 p[ 8] = VA_BITS16_UNDEFINED;
3651 p[ 9] = VA_BITS16_UNDEFINED;
3652 p[10] = VA_BITS16_UNDEFINED;
3653 p[11] = VA_BITS16_UNDEFINED;
3654 p[12] = VA_BITS16_UNDEFINED;
3655 p[13] = VA_BITS16_UNDEFINED;
3656 p[14] = VA_BITS16_UNDEFINED;
3657 p[15] = VA_BITS16_UNDEFINED;
3658 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3659 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3660 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3661 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3662 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3663 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3664 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3665 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3666 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3667 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3668 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3669 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3670 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3671 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3672 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3673 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3674 return;
3679 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3680 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3681 /* Now we know the address range is suitably sized and aligned. */
3682 UWord a_lo = (UWord)(base);
3683 UWord a_hi = (UWord)(base + 288 - 1);
3684 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3685 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3686 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3687 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3688 if (LIKELY(pm_off_lo == pm_off_hi)) {
3689 /* Now we know that the entire address range falls within a
3690 single secondary map, and that that secondary 'lives' in
3691 the main primary map. */
3692 SecMap* sm = get_secmap_for_writing_low(a_lo);
3693 UWord v_off16 = SM_OFF_16(a_lo);
3694 UShort* p = &sm->vabits16[v_off16];
3695 p[ 0] = VA_BITS16_UNDEFINED;
3696 p[ 1] = VA_BITS16_UNDEFINED;
3697 p[ 2] = VA_BITS16_UNDEFINED;
3698 p[ 3] = VA_BITS16_UNDEFINED;
3699 p[ 4] = VA_BITS16_UNDEFINED;
3700 p[ 5] = VA_BITS16_UNDEFINED;
3701 p[ 6] = VA_BITS16_UNDEFINED;
3702 p[ 7] = VA_BITS16_UNDEFINED;
3703 p[ 8] = VA_BITS16_UNDEFINED;
3704 p[ 9] = VA_BITS16_UNDEFINED;
3705 p[10] = VA_BITS16_UNDEFINED;
3706 p[11] = VA_BITS16_UNDEFINED;
3707 p[12] = VA_BITS16_UNDEFINED;
3708 p[13] = VA_BITS16_UNDEFINED;
3709 p[14] = VA_BITS16_UNDEFINED;
3710 p[15] = VA_BITS16_UNDEFINED;
3711 p[16] = VA_BITS16_UNDEFINED;
3712 p[17] = VA_BITS16_UNDEFINED;
3713 p[18] = VA_BITS16_UNDEFINED;
3714 p[19] = VA_BITS16_UNDEFINED;
3715 p[20] = VA_BITS16_UNDEFINED;
3716 p[21] = VA_BITS16_UNDEFINED;
3717 p[22] = VA_BITS16_UNDEFINED;
3718 p[23] = VA_BITS16_UNDEFINED;
3719 p[24] = VA_BITS16_UNDEFINED;
3720 p[25] = VA_BITS16_UNDEFINED;
3721 p[26] = VA_BITS16_UNDEFINED;
3722 p[27] = VA_BITS16_UNDEFINED;
3723 p[28] = VA_BITS16_UNDEFINED;
3724 p[29] = VA_BITS16_UNDEFINED;
3725 p[30] = VA_BITS16_UNDEFINED;
3726 p[31] = VA_BITS16_UNDEFINED;
3727 p[32] = VA_BITS16_UNDEFINED;
3728 p[33] = VA_BITS16_UNDEFINED;
3729 p[34] = VA_BITS16_UNDEFINED;
3730 p[35] = VA_BITS16_UNDEFINED;
3731 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3732 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3733 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3734 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3735 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3736 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3737 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3738 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3739 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3740 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3741 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3742 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3743 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3744 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3745 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3746 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3747 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3748 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3749 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3750 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3751 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3752 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3753 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3754 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3755 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3756 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3757 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3758 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3759 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3760 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3761 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3762 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3763 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3764 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3765 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3766 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3767 return;
3772 /* else fall into slow case */
3773 MC_(make_mem_undefined_w_otag)(base, len, otag);
3777 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3778 specialised for the non-origin-tracking case. */
3779 VG_REGPARM(2)
3780 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
3782 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
3783 if (0)
3784 VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3785 base, len );
3787 # if 0
3788 /* Slow(ish) version, which is fairly easily seen to be correct.
3790 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3791 make_aligned_word64_undefined(base + 0);
3792 make_aligned_word64_undefined(base + 8);
3793 make_aligned_word64_undefined(base + 16);
3794 make_aligned_word64_undefined(base + 24);
3796 make_aligned_word64_undefined(base + 32);
3797 make_aligned_word64_undefined(base + 40);
3798 make_aligned_word64_undefined(base + 48);
3799 make_aligned_word64_undefined(base + 56);
3801 make_aligned_word64_undefined(base + 64);
3802 make_aligned_word64_undefined(base + 72);
3803 make_aligned_word64_undefined(base + 80);
3804 make_aligned_word64_undefined(base + 88);
3806 make_aligned_word64_undefined(base + 96);
3807 make_aligned_word64_undefined(base + 104);
3808 make_aligned_word64_undefined(base + 112);
3809 make_aligned_word64_undefined(base + 120);
3810 } else {
3811 make_mem_undefined(base, len);
3813 # endif
3815 /* Idea is: go fast when
3816 * 8-aligned and length is 128
3817 * the sm is available in the main primary map
3818 * the address range falls entirely with a single secondary map
3819 If all those conditions hold, just update the V+A bits by writing
3820 directly into the vabits array. (If the sm was distinguished, this
3821 will make a copy and then write to it.)
3823 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3824 /* Now we know the address range is suitably sized and aligned. */
3825 UWord a_lo = (UWord)(base);
3826 UWord a_hi = (UWord)(base + 128 - 1);
3827 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3828 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3829 /* Now we know the entire range is within the main primary map. */
3830 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3831 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3832 if (LIKELY(pm_off_lo == pm_off_hi)) {
3833 /* Now we know that the entire address range falls within a
3834 single secondary map, and that that secondary 'lives' in
3835 the main primary map. */
3836 SecMap* sm = get_secmap_for_writing_low(a_lo);
3837 UWord v_off16 = SM_OFF_16(a_lo);
3838 UShort* p = &sm->vabits16[v_off16];
3839 p[ 0] = VA_BITS16_UNDEFINED;
3840 p[ 1] = VA_BITS16_UNDEFINED;
3841 p[ 2] = VA_BITS16_UNDEFINED;
3842 p[ 3] = VA_BITS16_UNDEFINED;
3843 p[ 4] = VA_BITS16_UNDEFINED;
3844 p[ 5] = VA_BITS16_UNDEFINED;
3845 p[ 6] = VA_BITS16_UNDEFINED;
3846 p[ 7] = VA_BITS16_UNDEFINED;
3847 p[ 8] = VA_BITS16_UNDEFINED;
3848 p[ 9] = VA_BITS16_UNDEFINED;
3849 p[10] = VA_BITS16_UNDEFINED;
3850 p[11] = VA_BITS16_UNDEFINED;
3851 p[12] = VA_BITS16_UNDEFINED;
3852 p[13] = VA_BITS16_UNDEFINED;
3853 p[14] = VA_BITS16_UNDEFINED;
3854 p[15] = VA_BITS16_UNDEFINED;
3855 return;
3860 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3861 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3862 /* Now we know the address range is suitably sized and aligned. */
3863 UWord a_lo = (UWord)(base);
3864 UWord a_hi = (UWord)(base + 288 - 1);
3865 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3866 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3867 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3868 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3869 if (LIKELY(pm_off_lo == pm_off_hi)) {
3870 /* Now we know that the entire address range falls within a
3871 single secondary map, and that that secondary 'lives' in
3872 the main primary map. */
3873 SecMap* sm = get_secmap_for_writing_low(a_lo);
3874 UWord v_off16 = SM_OFF_16(a_lo);
3875 UShort* p = &sm->vabits16[v_off16];
3876 p[ 0] = VA_BITS16_UNDEFINED;
3877 p[ 1] = VA_BITS16_UNDEFINED;
3878 p[ 2] = VA_BITS16_UNDEFINED;
3879 p[ 3] = VA_BITS16_UNDEFINED;
3880 p[ 4] = VA_BITS16_UNDEFINED;
3881 p[ 5] = VA_BITS16_UNDEFINED;
3882 p[ 6] = VA_BITS16_UNDEFINED;
3883 p[ 7] = VA_BITS16_UNDEFINED;
3884 p[ 8] = VA_BITS16_UNDEFINED;
3885 p[ 9] = VA_BITS16_UNDEFINED;
3886 p[10] = VA_BITS16_UNDEFINED;
3887 p[11] = VA_BITS16_UNDEFINED;
3888 p[12] = VA_BITS16_UNDEFINED;
3889 p[13] = VA_BITS16_UNDEFINED;
3890 p[14] = VA_BITS16_UNDEFINED;
3891 p[15] = VA_BITS16_UNDEFINED;
3892 p[16] = VA_BITS16_UNDEFINED;
3893 p[17] = VA_BITS16_UNDEFINED;
3894 p[18] = VA_BITS16_UNDEFINED;
3895 p[19] = VA_BITS16_UNDEFINED;
3896 p[20] = VA_BITS16_UNDEFINED;
3897 p[21] = VA_BITS16_UNDEFINED;
3898 p[22] = VA_BITS16_UNDEFINED;
3899 p[23] = VA_BITS16_UNDEFINED;
3900 p[24] = VA_BITS16_UNDEFINED;
3901 p[25] = VA_BITS16_UNDEFINED;
3902 p[26] = VA_BITS16_UNDEFINED;
3903 p[27] = VA_BITS16_UNDEFINED;
3904 p[28] = VA_BITS16_UNDEFINED;
3905 p[29] = VA_BITS16_UNDEFINED;
3906 p[30] = VA_BITS16_UNDEFINED;
3907 p[31] = VA_BITS16_UNDEFINED;
3908 p[32] = VA_BITS16_UNDEFINED;
3909 p[33] = VA_BITS16_UNDEFINED;
3910 p[34] = VA_BITS16_UNDEFINED;
3911 p[35] = VA_BITS16_UNDEFINED;
3912 return;
3917 /* else fall into slow case */
3918 make_mem_undefined(base, len);
3922 /* And this is an even more specialised case, for the case where there
3923 is no origin tracking, and the length is 128. */
3924 VG_REGPARM(1)
3925 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
3927 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
3928 if (0)
3929 VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
3931 # if 0
3932 /* Slow(ish) version, which is fairly easily seen to be correct.
3934 if (LIKELY( VG_IS_8_ALIGNED(base) )) {
3935 make_aligned_word64_undefined(base + 0);
3936 make_aligned_word64_undefined(base + 8);
3937 make_aligned_word64_undefined(base + 16);
3938 make_aligned_word64_undefined(base + 24);
3940 make_aligned_word64_undefined(base + 32);
3941 make_aligned_word64_undefined(base + 40);
3942 make_aligned_word64_undefined(base + 48);
3943 make_aligned_word64_undefined(base + 56);
3945 make_aligned_word64_undefined(base + 64);
3946 make_aligned_word64_undefined(base + 72);
3947 make_aligned_word64_undefined(base + 80);
3948 make_aligned_word64_undefined(base + 88);
3950 make_aligned_word64_undefined(base + 96);
3951 make_aligned_word64_undefined(base + 104);
3952 make_aligned_word64_undefined(base + 112);
3953 make_aligned_word64_undefined(base + 120);
3954 } else {
3955 make_mem_undefined(base, 128);
3957 # endif
3959 /* Idea is: go fast when
3960 * 16-aligned and length is 128
3961 * the sm is available in the main primary map
3962 * the address range falls entirely with a single secondary map
3963 If all those conditions hold, just update the V+A bits by writing
3964 directly into the vabits array. (If the sm was distinguished, this
3965 will make a copy and then write to it.)
3967 Typically this applies to amd64 'ret' instructions, since RSP is
3968 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
3970 if (LIKELY( VG_IS_16_ALIGNED(base) )) {
3971 /* Now we know the address range is suitably sized and aligned. */
3972 UWord a_lo = (UWord)(base);
3973 UWord a_hi = (UWord)(base + 128 - 1);
3974 /* FIXME: come up with a sane story on the wraparound case
3975 (which of course cnanot happen, but still..) */
3976 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
3977 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3978 /* Now we know the entire range is within the main primary map. */
3979 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3980 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3981 if (LIKELY(pm_off_lo == pm_off_hi)) {
3982 /* Now we know that the entire address range falls within a
3983 single secondary map, and that that secondary 'lives' in
3984 the main primary map. */
3985 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
3986 SecMap* sm = get_secmap_for_writing_low(a_lo);
3987 UWord v_off = SM_OFF(a_lo);
3988 UInt* w32 = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
3989 w32[ 0] = VA_BITS32_UNDEFINED;
3990 w32[ 1] = VA_BITS32_UNDEFINED;
3991 w32[ 2] = VA_BITS32_UNDEFINED;
3992 w32[ 3] = VA_BITS32_UNDEFINED;
3993 w32[ 4] = VA_BITS32_UNDEFINED;
3994 w32[ 5] = VA_BITS32_UNDEFINED;
3995 w32[ 6] = VA_BITS32_UNDEFINED;
3996 w32[ 7] = VA_BITS32_UNDEFINED;
3997 return;
4002 /* The same, but for when base is 8 % 16, which is the situation
4003 with RSP for amd64-ELF immediately after call instructions.
4005 if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
4006 /* Now we know the address range is suitably sized and aligned. */
4007 UWord a_lo = (UWord)(base);
4008 UWord a_hi = (UWord)(base + 128 - 1);
4009 /* FIXME: come up with a sane story on the wraparound case
4010 (which of course cnanot happen, but still..) */
4011 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4012 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4013 /* Now we know the entire range is within the main primary map. */
4014 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4015 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4016 if (LIKELY(pm_off_lo == pm_off_hi)) {
4017 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
4018 /* Now we know that the entire address range falls within a
4019 single secondary map, and that that secondary 'lives' in
4020 the main primary map. */
4021 SecMap* sm = get_secmap_for_writing_low(a_lo);
4022 UWord v_off16 = SM_OFF_16(a_lo);
4023 UShort* w16 = &sm->vabits16[v_off16];
4024 UInt* w32 = ASSUME_ALIGNED(UInt*, &w16[1]);
4025 /* The following assertion is commented out for obvious
4026 performance reasons, but was verified as valid when
4027 running the entire testsuite and also Firefox. */
4028 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4029 w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
4030 w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
4031 w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
4032 w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
4033 w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
4034 w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
4035 w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
4036 w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
4037 w16[15] = VA_BITS16_UNDEFINED; // w16[15]
4038 return;
4043 /* else fall into slow case */
4044 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
4045 make_mem_undefined(base, 128);
4049 /*------------------------------------------------------------*/
4050 /*--- Checking memory ---*/
4051 /*------------------------------------------------------------*/
4053 typedef
4054 enum {
4055 MC_Ok = 5,
4056 MC_AddrErr = 6,
4057 MC_ValueErr = 7
4059 MC_ReadResult;
4062 /* Check permissions for address range. If inadequate permissions
4063 exist, *bad_addr is set to the offending address, so the caller can
4064 know what it is. */
4066 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4067 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4068 indicate the lowest failing address. Functions below are
4069 similar. */
4070 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
4072 SizeT i;
4073 UWord vabits2;
4075 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
4076 for (i = 0; i < len; i++) {
4077 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
4078 vabits2 = get_vabits2(a);
4079 if (VA_BITS2_NOACCESS != vabits2) {
4080 if (bad_addr != NULL) *bad_addr = a;
4081 return False;
4083 a++;
4085 return True;
4088 static Bool is_mem_addressable ( Addr a, SizeT len,
4089 /*OUT*/Addr* bad_addr )
4091 SizeT i;
4092 UWord vabits2;
4094 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
4095 for (i = 0; i < len; i++) {
4096 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
4097 vabits2 = get_vabits2(a);
4098 if (VA_BITS2_NOACCESS == vabits2) {
4099 if (bad_addr != NULL) *bad_addr = a;
4100 return False;
4102 a++;
4104 return True;
4107 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
4108 /*OUT*/Addr* bad_addr,
4109 /*OUT*/UInt* otag )
4111 SizeT i;
4112 UWord vabits2;
4114 PROF_EVENT(MCPE_IS_MEM_DEFINED);
4115 DEBUG("is_mem_defined\n");
4117 if (otag) *otag = 0;
4118 if (bad_addr) *bad_addr = 0;
4119 for (i = 0; i < len; i++) {
4120 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
4121 vabits2 = get_vabits2(a);
4122 if (VA_BITS2_DEFINED != vabits2) {
4123 // Error! Nb: Report addressability errors in preference to
4124 // definedness errors. And don't report definedeness errors unless
4125 // --undef-value-errors=yes.
4126 if (bad_addr) {
4127 *bad_addr = a;
4129 if (VA_BITS2_NOACCESS == vabits2) {
4130 return MC_AddrErr;
4132 if (MC_(clo_mc_level) >= 2) {
4133 if (otag && MC_(clo_mc_level) == 3) {
4134 *otag = MC_(helperc_b_load1)( a );
4136 return MC_ValueErr;
4139 a++;
4141 return MC_Ok;
4145 /* Like is_mem_defined but doesn't give up at the first uninitialised
4146 byte -- the entire range is always checked. This is important for
4147 detecting errors in the case where a checked range strays into
4148 invalid memory, but that fact is not detected by the ordinary
4149 is_mem_defined(), because of an undefined section that precedes the
4150 out of range section, possibly as a result of an alignment hole in
4151 the checked data. This version always checks the entire range and
4152 can report both a definedness and an accessbility error, if
4153 necessary. */
4154 static void is_mem_defined_comprehensive (
4155 Addr a, SizeT len,
4156 /*OUT*/Bool* errorV, /* is there a definedness err? */
4157 /*OUT*/Addr* bad_addrV, /* if so where? */
4158 /*OUT*/UInt* otagV, /* and what's its otag? */
4159 /*OUT*/Bool* errorA, /* is there an addressability err? */
4160 /*OUT*/Addr* bad_addrA /* if so where? */
4163 SizeT i;
4164 UWord vabits2;
4165 Bool already_saw_errV = False;
4167 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
4168 DEBUG("is_mem_defined_comprehensive\n");
4170 tl_assert(!(*errorV || *errorA));
4172 for (i = 0; i < len; i++) {
4173 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
4174 vabits2 = get_vabits2(a);
4175 switch (vabits2) {
4176 case VA_BITS2_DEFINED:
4177 a++;
4178 break;
4179 case VA_BITS2_UNDEFINED:
4180 case VA_BITS2_PARTDEFINED:
4181 if (!already_saw_errV) {
4182 *errorV = True;
4183 *bad_addrV = a;
4184 if (MC_(clo_mc_level) == 3) {
4185 *otagV = MC_(helperc_b_load1)( a );
4186 } else {
4187 *otagV = 0;
4189 already_saw_errV = True;
4191 a++; /* keep going */
4192 break;
4193 case VA_BITS2_NOACCESS:
4194 *errorA = True;
4195 *bad_addrA = a;
4196 return; /* give up now. */
4197 default:
4198 tl_assert(0);
4204 /* Check a zero-terminated ascii string. Tricky -- don't want to
4205 examine the actual bytes, to find the end, until we're sure it is
4206 safe to do so. */
4208 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
4210 UWord vabits2;
4212 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
4213 DEBUG("mc_is_defined_asciiz\n");
4215 if (otag) *otag = 0;
4216 if (bad_addr) *bad_addr = 0;
4217 while (True) {
4218 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
4219 vabits2 = get_vabits2(a);
4220 if (VA_BITS2_DEFINED != vabits2) {
4221 // Error! Nb: Report addressability errors in preference to
4222 // definedness errors. And don't report definedeness errors unless
4223 // --undef-value-errors=yes.
4224 if (bad_addr) {
4225 *bad_addr = a;
4227 if (VA_BITS2_NOACCESS == vabits2) {
4228 return MC_AddrErr;
4230 if (MC_(clo_mc_level) >= 2) {
4231 if (otag && MC_(clo_mc_level) == 3) {
4232 *otag = MC_(helperc_b_load1)( a );
4234 return MC_ValueErr;
4237 /* Ok, a is safe to read. */
4238 if (* ((UChar*)a) == 0) {
4239 return MC_Ok;
4241 a++;
4246 /*------------------------------------------------------------*/
4247 /*--- Memory event handlers ---*/
4248 /*------------------------------------------------------------*/
4250 static
4251 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
4252 Addr base, SizeT size )
4254 Addr bad_addr;
4255 Bool ok = is_mem_addressable ( base, size, &bad_addr );
4257 if (!ok) {
4258 switch (part) {
4259 case Vg_CoreSysCall:
4260 MC_(record_memparam_error) ( tid, bad_addr,
4261 /*isAddrErr*/True, s, 0/*otag*/ );
4262 break;
4264 case Vg_CoreSignal:
4265 MC_(record_core_mem_error)( tid, s );
4266 break;
4268 default:
4269 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
4274 static
4275 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
4276 Addr base, SizeT size )
4278 UInt otag = 0;
4279 Addr bad_addr;
4280 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
4282 if (MC_Ok != res) {
4283 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4285 switch (part) {
4286 case Vg_CoreSysCall:
4287 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4288 isAddrErr ? 0 : otag );
4289 break;
4291 case Vg_CoreSysCallArgInMem:
4292 MC_(record_regparam_error) ( tid, s, otag );
4293 break;
4295 /* If we're being asked to jump to a silly address, record an error
4296 message before potentially crashing the entire system. */
4297 case Vg_CoreTranslate:
4298 MC_(record_jump_error)( tid, bad_addr );
4299 break;
4301 default:
4302 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
4307 static
4308 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
4309 const HChar* s, Addr str )
4311 MC_ReadResult res;
4312 Addr bad_addr = 0; // shut GCC up
4313 UInt otag = 0;
4315 tl_assert(part == Vg_CoreSysCall);
4316 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4317 if (MC_Ok != res) {
4318 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4319 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4320 isAddrErr ? 0 : otag );
4324 /* Handling of mmap and mprotect is not as simple as it seems.
4326 The underlying semantics are that memory obtained from mmap is
4327 always initialised, but may be inaccessible. And changes to the
4328 protection of memory do not change its contents and hence not its
4329 definedness state. Problem is we can't model
4330 inaccessible-but-with-some-definedness state; once we mark memory
4331 as inaccessible we lose all info about definedness, and so can't
4332 restore that if it is later made accessible again.
4334 One obvious thing to do is this:
4336 mmap/mprotect NONE -> noaccess
4337 mmap/mprotect other -> defined
4339 The problem case here is: taking accessible memory, writing
4340 uninitialised data to it, mprotecting it NONE and later mprotecting
4341 it back to some accessible state causes the undefinedness to be
4342 lost.
4344 A better proposal is:
4346 (1) mmap NONE -> make noaccess
4347 (2) mmap other -> make defined
4349 (3) mprotect NONE -> # no change
4350 (4) mprotect other -> change any "noaccess" to "defined"
4352 (2) is OK because memory newly obtained from mmap really is defined
4353 (zeroed out by the kernel -- doing anything else would
4354 constitute a massive security hole.)
4356 (1) is OK because the only way to make the memory usable is via
4357 (4), in which case we also wind up correctly marking it all as
4358 defined.
4360 (3) is the weak case. We choose not to change memory state.
4361 (presumably the range is in some mixture of "defined" and
4362 "undefined", viz, accessible but with arbitrary V bits). Doing
4363 nothing means we retain the V bits, so that if the memory is
4364 later mprotected "other", the V bits remain unchanged, so there
4365 can be no false negatives. The bad effect is that if there's
4366 an access in the area, then MC cannot warn; but at least we'll
4367 get a SEGV to show, so it's better than nothing.
4369 Consider the sequence (3) followed by (4). Any memory that was
4370 "defined" or "undefined" previously retains its state (as
4371 required). Any memory that was "noaccess" before can only have
4372 been made that way by (1), and so it's OK to change it to
4373 "defined".
4375 See https://bugs.kde.org/show_bug.cgi?id=205541
4376 and https://bugs.kde.org/show_bug.cgi?id=210268
4378 static
4379 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4380 ULong di_handle )
4382 if (rr || ww || xx) {
4383 /* (2) mmap/mprotect other -> defined */
4384 MC_(make_mem_defined)(a, len);
4385 } else {
4386 /* (1) mmap/mprotect NONE -> noaccess */
4387 MC_(make_mem_noaccess)(a, len);
4391 static
4392 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4394 if (rr || ww || xx) {
4395 /* (4) mprotect other -> change any "noaccess" to "defined" */
4396 make_mem_defined_if_noaccess(a, len);
4397 } else {
4398 /* (3) mprotect NONE -> # no change */
4399 /* do nothing */
4404 static
4405 void mc_new_mem_startup( Addr a, SizeT len,
4406 Bool rr, Bool ww, Bool xx, ULong di_handle )
4408 // Because code is defined, initialised variables get put in the data
4409 // segment and are defined, and uninitialised variables get put in the
4410 // bss segment and are auto-zeroed (and so defined).
4412 // It's possible that there will be padding between global variables.
4413 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4414 // a program uses it, Memcheck will not complain. This is arguably a
4415 // false negative, but it's a grey area -- the behaviour is defined (the
4416 // padding is zeroed) but it's probably not what the user intended. And
4417 // we can't avoid it.
4419 // Note: we generally ignore RWX permissions, because we can't track them
4420 // without requiring more than one A bit which would slow things down a
4421 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4422 // So we mark any such pages as "unaddressable".
4423 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4424 a, (ULong)len, rr, ww, xx);
4425 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4428 static
4429 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4431 MC_(make_mem_defined)(a, len);
4435 /*------------------------------------------------------------*/
4436 /*--- Register event handlers ---*/
4437 /*------------------------------------------------------------*/
4439 /* Try and get a nonzero origin for the guest state section of thread
4440 tid characterised by (offset,size). Return 0 if nothing to show
4441 for it. */
4442 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4443 Int offset, SizeT size )
4445 Int sh2off;
4446 UInt area[3];
4447 UInt otag;
4448 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4449 if (sh2off == -1)
4450 return 0; /* This piece of guest state is not tracked */
4451 tl_assert(sh2off >= 0);
4452 tl_assert(0 == (sh2off % 4));
4453 area[0] = 0x31313131;
4454 area[2] = 0x27272727;
4455 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4456 tl_assert(area[0] == 0x31313131);
4457 tl_assert(area[2] == 0x27272727);
4458 otag = area[1];
4459 return otag;
4463 /* When some chunk of guest state is written, mark the corresponding
4464 shadow area as valid. This is used to initialise arbitrarily large
4465 chunks of guest state, hence the _SIZE value, which has to be as
4466 big as the biggest guest state.
4468 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4469 PtrdiffT offset, SizeT size)
4471 # define MAX_REG_WRITE_SIZE 1744
4472 UChar area[MAX_REG_WRITE_SIZE];
4473 tl_assert(size <= MAX_REG_WRITE_SIZE);
4474 VG_(memset)(area, V_BITS8_DEFINED, size);
4475 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4476 # undef MAX_REG_WRITE_SIZE
4479 static
4480 void mc_post_reg_write_clientcall ( ThreadId tid,
4481 PtrdiffT offset, SizeT size, Addr f)
4483 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4486 /* Look at the definedness of the guest's shadow state for
4487 [offset, offset+len). If any part of that is undefined, record
4488 a parameter error.
4490 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4491 PtrdiffT offset, SizeT size)
4493 Int i;
4494 Bool bad;
4495 UInt otag;
4497 UChar area[16];
4498 tl_assert(size <= 16);
4500 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4502 bad = False;
4503 for (i = 0; i < size; i++) {
4504 if (area[i] != V_BITS8_DEFINED) {
4505 bad = True;
4506 break;
4510 if (!bad)
4511 return;
4513 /* We've found some undefinedness. See if we can also find an
4514 origin for it. */
4515 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4516 MC_(record_regparam_error) ( tid, s, otag );
4520 /*------------------------------------------------------------*/
4521 /*--- Register-memory event handlers ---*/
4522 /*------------------------------------------------------------*/
4524 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4525 PtrdiffT guest_state_offset, SizeT size )
4527 SizeT i;
4528 UChar vbits8;
4529 Int offset;
4530 UInt d32;
4532 /* Slow loop. */
4533 for (i = 0; i < size; i++) {
4534 get_vbits8( a+i, &vbits8 );
4535 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4536 1, &vbits8 );
4539 if (MC_(clo_mc_level) != 3)
4540 return;
4542 /* Track origins. */
4543 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4544 if (offset == -1)
4545 return;
4547 switch (size) {
4548 case 1:
4549 d32 = MC_(helperc_b_load1)( a );
4550 break;
4551 case 2:
4552 d32 = MC_(helperc_b_load2)( a );
4553 break;
4554 case 4:
4555 d32 = MC_(helperc_b_load4)( a );
4556 break;
4557 case 8:
4558 d32 = MC_(helperc_b_load8)( a );
4559 break;
4560 case 16:
4561 d32 = MC_(helperc_b_load16)( a );
4562 break;
4563 case 32:
4564 d32 = MC_(helperc_b_load32)( a );
4565 break;
4566 default:
4567 tl_assert(0);
4570 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4573 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4574 PtrdiffT guest_state_offset, Addr a,
4575 SizeT size )
4577 SizeT i;
4578 UChar vbits8;
4579 Int offset;
4580 UInt d32;
4582 /* Slow loop. */
4583 for (i = 0; i < size; i++) {
4584 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4585 guest_state_offset+i, 1 );
4586 set_vbits8( a+i, vbits8 );
4589 if (MC_(clo_mc_level) != 3)
4590 return;
4592 /* Track origins. */
4593 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4594 if (offset == -1)
4595 return;
4597 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4598 switch (size) {
4599 case 1:
4600 MC_(helperc_b_store1)( a, d32 );
4601 break;
4602 case 2:
4603 MC_(helperc_b_store2)( a, d32 );
4604 break;
4605 case 4:
4606 MC_(helperc_b_store4)( a, d32 );
4607 break;
4608 case 8:
4609 MC_(helperc_b_store8)( a, d32 );
4610 break;
4611 case 16:
4612 MC_(helperc_b_store16)( a, d32 );
4613 break;
4614 case 32:
4615 MC_(helperc_b_store32)( a, d32 );
4616 break;
4617 default:
4618 tl_assert(0);
4623 /*------------------------------------------------------------*/
4624 /*--- Some static assertions ---*/
4625 /*------------------------------------------------------------*/
4627 /* The handwritten assembly helpers below have baked-in assumptions
4628 about various constant values. These assertions attempt to make
4629 that a bit safer by checking those values and flagging changes that
4630 would make the assembly invalid. Not perfect but it's better than
4631 nothing. */
4633 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4635 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4636 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4638 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4639 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4641 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4642 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4644 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4645 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4647 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4648 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4650 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4651 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4654 /*------------------------------------------------------------*/
4655 /*--- Functions called directly from generated code: ---*/
4656 /*--- Load/store handlers. ---*/
4657 /*------------------------------------------------------------*/
4659 /* Types: LOADV32, LOADV16, LOADV8 are:
4660 UWord fn ( Addr a )
4661 so they return 32-bits on 32-bit machines and 64-bits on
4662 64-bit machines. Addr has the same size as a host word.
4664 LOADV64 is always ULong fn ( Addr a )
4666 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4667 are a UWord, and for STOREV64 they are a ULong.
4670 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4671 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4672 primary map. This is all very tricky (and important!), so let's
4673 work through the maths by hand (below), *and* assert for these
4674 values at startup. */
4675 #define MASK(_szInBytes) \
4676 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4678 /* MASK only exists so as to define this macro. */
4679 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4680 ((_a) & MASK((_szInBits>>3)))
4682 /* On a 32-bit machine:
4684 N_PRIMARY_BITS == 16, so
4685 N_PRIMARY_MAP == 0x10000, so
4686 N_PRIMARY_MAP-1 == 0xFFFF, so
4687 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4689 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4690 = ~ ( 0xFFFF | 0xFFFF0000 )
4691 = ~ 0xFFFF'FFFF
4694 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4695 = ~ ( 0xFFFE | 0xFFFF0000 )
4696 = ~ 0xFFFF'FFFE
4699 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4700 = ~ ( 0xFFFC | 0xFFFF0000 )
4701 = ~ 0xFFFF'FFFC
4704 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4705 = ~ ( 0xFFF8 | 0xFFFF0000 )
4706 = ~ 0xFFFF'FFF8
4709 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4710 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4711 the 1-byte alignment case, it is always a zero value, since MASK(1)
4712 is zero. All as expected.
4714 On a 64-bit machine, it's more complex, since we're testing
4715 simultaneously for misalignment and for the address being at or
4716 above 64G:
4718 N_PRIMARY_BITS == 20, so
4719 N_PRIMARY_MAP == 0x100000, so
4720 N_PRIMARY_MAP-1 == 0xFFFFF, so
4721 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4723 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4724 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4725 = ~ 0xF'FFFF'FFFF
4726 = 0xFFFF'FFF0'0000'0000
4728 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4729 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4730 = ~ 0xF'FFFF'FFFE
4731 = 0xFFFF'FFF0'0000'0001
4733 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4734 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4735 = ~ 0xF'FFFF'FFFC
4736 = 0xFFFF'FFF0'0000'0003
4738 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4739 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4740 = ~ 0xF'FFFF'FFF8
4741 = 0xFFFF'FFF0'0000'0007
4744 /*------------------------------------------------------------*/
4745 /*--- LOADV256 and LOADV128 ---*/
4746 /*------------------------------------------------------------*/
4748 static INLINE
4749 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4750 Addr a, SizeT nBits, Bool isBigEndian )
4752 PROF_EVENT(MCPE_LOADV_128_OR_256);
4754 #ifndef PERF_FAST_LOADV
4755 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4756 return;
4757 #else
4759 UWord sm_off16, vabits16, j;
4760 UWord nBytes = nBits / 8;
4761 UWord nULongs = nBytes / 8;
4762 SecMap* sm;
4764 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4765 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4766 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4767 return;
4770 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4771 suitably aligned, is mapped, and addressible. */
4772 for (j = 0; j < nULongs; j++) {
4773 sm = get_secmap_for_reading_low(a + 8*j);
4774 sm_off16 = SM_OFF_16(a + 8*j);
4775 vabits16 = sm->vabits16[sm_off16];
4777 // Convert V bits from compact memory form to expanded
4778 // register form.
4779 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4780 res[j] = V_BITS64_DEFINED;
4781 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4782 res[j] = V_BITS64_UNDEFINED;
4783 } else {
4784 /* Slow case: some block of 8 bytes are not all-defined or
4785 all-undefined. */
4786 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4787 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4788 return;
4791 return;
4793 #endif
4796 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4798 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4800 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4802 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4805 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4807 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4809 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4811 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4814 /*------------------------------------------------------------*/
4815 /*--- LOADV64 ---*/
4816 /*------------------------------------------------------------*/
4818 static INLINE
4819 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4821 PROF_EVENT(MCPE_LOADV64);
4823 #ifndef PERF_FAST_LOADV
4824 return mc_LOADVn_slow( a, 64, isBigEndian );
4825 #else
4827 UWord sm_off16, vabits16;
4828 SecMap* sm;
4830 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4831 PROF_EVENT(MCPE_LOADV64_SLOW1);
4832 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4835 sm = get_secmap_for_reading_low(a);
4836 sm_off16 = SM_OFF_16(a);
4837 vabits16 = sm->vabits16[sm_off16];
4839 // Handle common case quickly: a is suitably aligned, is mapped, and
4840 // addressible.
4841 // Convert V bits from compact memory form to expanded register form.
4842 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4843 return V_BITS64_DEFINED;
4844 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4845 return V_BITS64_UNDEFINED;
4846 } else {
4847 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4848 PROF_EVENT(MCPE_LOADV64_SLOW2);
4849 return mc_LOADVn_slow( a, 64, isBigEndian );
4852 #endif
4855 // Generic for all platforms
4856 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4858 return mc_LOADV64(a, True);
4861 // Non-generic assembly for arm32-linux
4862 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4863 && defined(VGP_arm_linux)
4864 __asm__( /* Derived from the 32 bit assembly helper */
4865 ".text \n"
4866 ".align 2 \n"
4867 ".global vgMemCheck_helperc_LOADV64le \n"
4868 ".type vgMemCheck_helperc_LOADV64le, %function \n"
4869 "vgMemCheck_helperc_LOADV64le: \n"
4870 " tst r0, #7 \n"
4871 " movw r3, #:lower16:primary_map \n"
4872 " bne .LLV64LEc4 \n" // if misaligned
4873 " lsr r2, r0, #16 \n"
4874 " movt r3, #:upper16:primary_map \n"
4875 " ldr r2, [r3, r2, lsl #2] \n"
4876 " uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000
4877 " movw r3, #0xAAAA \n"
4878 " lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4879 " ldrh r1, [r2, r1] \n"
4880 " cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED
4881 " bne .LLV64LEc0 \n" // if !all_defined
4882 " mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4883 " mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED
4884 " bx lr \n"
4885 ".LLV64LEc0: \n"
4886 " movw r3, #0x5555 \n"
4887 " cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED
4888 " bne .LLV64LEc4 \n" // if !all_undefined
4889 " mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4890 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4891 " bx lr \n"
4892 ".LLV64LEc4: \n"
4893 " push {r4, lr} \n"
4894 " mov r2, #0 \n"
4895 " mov r1, #64 \n"
4896 " bl mc_LOADVn_slow \n"
4897 " pop {r4, pc} \n"
4898 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4899 ".previous\n"
4902 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4903 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4904 __asm__(
4905 ".text\n"
4906 ".align 16\n"
4907 ".global vgMemCheck_helperc_LOADV64le\n"
4908 ".type vgMemCheck_helperc_LOADV64le, @function\n"
4909 "vgMemCheck_helperc_LOADV64le:\n"
4910 " test $0x7, %eax\n"
4911 " jne .LLV64LE2\n" /* jump if not aligned */
4912 " mov %eax, %ecx\n"
4913 " movzwl %ax, %edx\n"
4914 " shr $0x10, %ecx\n"
4915 " mov primary_map(,%ecx,4), %ecx\n"
4916 " shr $0x3, %edx\n"
4917 " movzwl (%ecx,%edx,2), %edx\n"
4918 " cmp $0xaaaa, %edx\n"
4919 " jne .LLV64LE1\n" /* jump if not all defined */
4920 " xor %eax, %eax\n" /* return 0 in edx:eax */
4921 " xor %edx, %edx\n"
4922 " ret\n"
4923 ".LLV64LE1:\n"
4924 " cmp $0x5555, %edx\n"
4925 " jne .LLV64LE2\n" /* jump if not all undefined */
4926 " or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4927 " or $0xffffffff, %edx\n"
4928 " ret\n"
4929 ".LLV64LE2:\n"
4930 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */
4931 " mov $64, %edx\n"
4932 " jmp mc_LOADVn_slow\n"
4933 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4934 ".previous\n"
4937 #else
4938 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
4939 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4941 return mc_LOADV64(a, False);
4943 #endif
4945 /*------------------------------------------------------------*/
4946 /*--- STOREV64 ---*/
4947 /*------------------------------------------------------------*/
4949 static INLINE
4950 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4952 PROF_EVENT(MCPE_STOREV64);
4954 #ifndef PERF_FAST_STOREV
4955 // XXX: this slow case seems to be marginally faster than the fast case!
4956 // Investigate further.
4957 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4958 #else
4960 UWord sm_off16, vabits16;
4961 SecMap* sm;
4963 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4964 PROF_EVENT(MCPE_STOREV64_SLOW1);
4965 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4966 return;
4969 sm = get_secmap_for_reading_low(a);
4970 sm_off16 = SM_OFF_16(a);
4971 vabits16 = sm->vabits16[sm_off16];
4973 // To understand the below cleverness, see the extensive comments
4974 // in MC_(helperc_STOREV8).
4975 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4976 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4977 return;
4979 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4980 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
4981 return;
4983 PROF_EVENT(MCPE_STOREV64_SLOW2);
4984 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4985 return;
4987 if (V_BITS64_UNDEFINED == vbits64) {
4988 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4989 return;
4991 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4992 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
4993 return;
4995 PROF_EVENT(MCPE_STOREV64_SLOW3);
4996 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4997 return;
5000 PROF_EVENT(MCPE_STOREV64_SLOW4);
5001 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5003 #endif
5006 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
5008 mc_STOREV64(a, vbits64, True);
5010 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
5012 mc_STOREV64(a, vbits64, False);
5015 /*------------------------------------------------------------*/
5016 /*--- LOADV32 ---*/
5017 /*------------------------------------------------------------*/
5019 static INLINE
5020 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
5022 PROF_EVENT(MCPE_LOADV32);
5024 #ifndef PERF_FAST_LOADV
5025 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5026 #else
5028 UWord sm_off, vabits8;
5029 SecMap* sm;
5031 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5032 PROF_EVENT(MCPE_LOADV32_SLOW1);
5033 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5036 sm = get_secmap_for_reading_low(a);
5037 sm_off = SM_OFF(a);
5038 vabits8 = sm->vabits8[sm_off];
5040 // Handle common case quickly: a is suitably aligned, is mapped, and the
5041 // entire word32 it lives in is addressible.
5042 // Convert V bits from compact memory form to expanded register form.
5043 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5044 // Almost certainly not necessary, but be paranoid.
5045 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5046 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
5047 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
5048 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
5049 } else {
5050 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5051 PROF_EVENT(MCPE_LOADV32_SLOW2);
5052 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5055 #endif
5058 // Generic for all platforms
5059 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
5061 return mc_LOADV32(a, True);
5064 // Non-generic assembly for arm32-linux
5065 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5066 && defined(VGP_arm_linux)
5067 __asm__( /* Derived from NCode template */
5068 ".text \n"
5069 ".align 2 \n"
5070 ".global vgMemCheck_helperc_LOADV32le \n"
5071 ".type vgMemCheck_helperc_LOADV32le, %function \n"
5072 "vgMemCheck_helperc_LOADV32le: \n"
5073 " tst r0, #3 \n" // 1
5074 " movw r3, #:lower16:primary_map \n" // 1
5075 " bne .LLV32LEc4 \n" // 2 if misaligned
5076 " lsr r2, r0, #16 \n" // 3
5077 " movt r3, #:upper16:primary_map \n" // 3
5078 " ldr r2, [r3, r2, lsl #2] \n" // 4
5079 " uxth r1, r0 \n" // 4
5080 " ldrb r1, [r2, r1, lsr #2] \n" // 5
5081 " cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED
5082 " bne .LLV32LEc0 \n" // 7 if !all_defined
5083 " mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED
5084 " bx lr \n" // 9
5085 ".LLV32LEc0: \n"
5086 " cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED
5087 " bne .LLV32LEc4 \n" // if !all_undefined
5088 " mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
5089 " bx lr \n"
5090 ".LLV32LEc4: \n"
5091 " push {r4, lr} \n"
5092 " mov r2, #0 \n"
5093 " mov r1, #32 \n"
5094 " bl mc_LOADVn_slow \n"
5095 " pop {r4, pc} \n"
5096 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
5097 ".previous\n"
5100 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5101 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5102 __asm__(
5103 ".text\n"
5104 ".align 16\n"
5105 ".global vgMemCheck_helperc_LOADV32le\n"
5106 ".type vgMemCheck_helperc_LOADV32le, @function\n"
5107 "vgMemCheck_helperc_LOADV32le:\n"
5108 " test $0x3, %eax\n"
5109 " jnz .LLV32LE2\n" /* jump if misaligned */
5110 " mov %eax, %edx\n"
5111 " shr $16, %edx\n"
5112 " mov primary_map(,%edx,4), %ecx\n"
5113 " movzwl %ax, %edx\n"
5114 " shr $2, %edx\n"
5115 " movzbl (%ecx,%edx,1), %edx\n"
5116 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5117 " jne .LLV32LE1\n" /* jump if not completely defined */
5118 " xor %eax, %eax\n" /* else return V_BITS32_DEFINED */
5119 " ret\n"
5120 ".LLV32LE1:\n"
5121 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5122 " jne .LLV32LE2\n" /* jump if not completely undefined */
5123 " or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
5124 " ret\n"
5125 ".LLV32LE2:\n"
5126 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */
5127 " mov $32, %edx\n"
5128 " jmp mc_LOADVn_slow\n"
5129 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
5130 ".previous\n"
5133 #else
5134 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5135 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
5137 return mc_LOADV32(a, False);
5139 #endif
5141 /*------------------------------------------------------------*/
5142 /*--- STOREV32 ---*/
5143 /*------------------------------------------------------------*/
5145 static INLINE
5146 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
5148 PROF_EVENT(MCPE_STOREV32);
5150 #ifndef PERF_FAST_STOREV
5151 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5152 #else
5154 UWord sm_off, vabits8;
5155 SecMap* sm;
5157 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5158 PROF_EVENT(MCPE_STOREV32_SLOW1);
5159 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5160 return;
5163 sm = get_secmap_for_reading_low(a);
5164 sm_off = SM_OFF(a);
5165 vabits8 = sm->vabits8[sm_off];
5167 // To understand the below cleverness, see the extensive comments
5168 // in MC_(helperc_STOREV8).
5169 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
5170 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
5171 return;
5173 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
5174 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
5175 return;
5177 PROF_EVENT(MCPE_STOREV32_SLOW2);
5178 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5179 return;
5181 if (V_BITS32_UNDEFINED == vbits32) {
5182 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
5183 return;
5185 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
5186 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
5187 return;
5189 PROF_EVENT(MCPE_STOREV32_SLOW3);
5190 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5191 return;
5194 PROF_EVENT(MCPE_STOREV32_SLOW4);
5195 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5197 #endif
5200 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
5202 mc_STOREV32(a, vbits32, True);
5204 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
5206 mc_STOREV32(a, vbits32, False);
5209 /*------------------------------------------------------------*/
5210 /*--- LOADV16 ---*/
5211 /*------------------------------------------------------------*/
5213 static INLINE
5214 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
5216 PROF_EVENT(MCPE_LOADV16);
5218 #ifndef PERF_FAST_LOADV
5219 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5220 #else
5222 UWord sm_off, vabits8;
5223 SecMap* sm;
5225 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5226 PROF_EVENT(MCPE_LOADV16_SLOW1);
5227 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5230 sm = get_secmap_for_reading_low(a);
5231 sm_off = SM_OFF(a);
5232 vabits8 = sm->vabits8[sm_off];
5233 // Handle common case quickly: a is suitably aligned, is mapped, and is
5234 // addressible.
5235 // Convert V bits from compact memory form to expanded register form
5236 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
5237 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
5238 else {
5239 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5240 // the two sub-bytes.
5241 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
5242 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
5243 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
5244 else {
5245 /* Slow case: the two bytes are not all-defined or all-undefined. */
5246 PROF_EVENT(MCPE_LOADV16_SLOW2);
5247 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5251 #endif
5254 // Generic for all platforms
5255 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
5257 return mc_LOADV16(a, True);
5260 // Non-generic assembly for arm32-linux
5261 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5262 && defined(VGP_arm_linux)
5263 __asm__( /* Derived from NCode template */
5264 ".text \n"
5265 ".align 2 \n"
5266 ".global vgMemCheck_helperc_LOADV16le \n"
5267 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5268 "vgMemCheck_helperc_LOADV16le: \n" //
5269 " tst r0, #1 \n" //
5270 " bne .LLV16LEc12 \n" // if misaligned
5271 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5272 " movw r3, #:lower16:primary_map \n" //
5273 " uxth r1, r0 \n" // r1 = sec-map-offB
5274 " movt r3, #:upper16:primary_map \n" //
5275 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5276 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5277 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5278 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5279 ".LLV16LEh9: \n" //
5280 " mov r0, #0xFFFFFFFF \n" //
5281 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5282 " bx lr \n" //
5283 ".LLV16LEc0: \n" //
5284 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5285 " bne .LLV16LEc4 \n" //
5286 ".LLV16LEc2: \n" //
5287 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5288 " bx lr \n" //
5289 ".LLV16LEc4: \n" //
5290 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5291 // Extract the relevant 4 bits and inspect.
5292 " and r2, r0, #2 \n" // addr & 2
5293 " add r2, r2, r2 \n" // 2 * (addr & 2)
5294 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5295 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5297 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5298 " beq .LLV16LEh9 \n" //
5300 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5301 " beq .LLV16LEc2 \n" //
5303 ".LLV16LEc12: \n" //
5304 " push {r4, lr} \n" //
5305 " mov r2, #0 \n" //
5306 " mov r1, #16 \n" //
5307 " bl mc_LOADVn_slow \n" //
5308 " pop {r4, pc} \n" //
5309 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5310 ".previous\n"
5313 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5314 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5315 __asm__(
5316 ".text\n"
5317 ".align 16\n"
5318 ".global vgMemCheck_helperc_LOADV16le\n"
5319 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5320 "vgMemCheck_helperc_LOADV16le:\n"
5321 " test $0x1, %eax\n"
5322 " jne .LLV16LE5\n" /* jump if not aligned */
5323 " mov %eax, %edx\n"
5324 " shr $0x10, %edx\n"
5325 " mov primary_map(,%edx,4), %ecx\n"
5326 " movzwl %ax, %edx\n"
5327 " shr $0x2, %edx\n"
5328 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5329 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5330 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5331 ".LLV16LE1:\n"
5332 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5333 " ret\n"
5334 ".LLV16LE2:\n"
5335 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5336 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5337 ".LLV16LE3:\n"
5338 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5339 " ret\n"
5340 ".LLV16LE4:\n"
5341 " mov %eax, %ecx\n"
5342 " and $0x2, %ecx\n"
5343 " add %ecx, %ecx\n"
5344 " sar %cl, %edx\n"
5345 " and $0xf, %edx\n"
5346 " cmp $0xa, %edx\n"
5347 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5348 " cmp $0x5, %edx\n"
5349 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5350 ".LLV16LE5:\n"
5351 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5352 " mov $16, %edx\n"
5353 " jmp mc_LOADVn_slow\n"
5354 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5355 ".previous\n"
5358 #else
5359 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5360 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5362 return mc_LOADV16(a, False);
5364 #endif
5366 /*------------------------------------------------------------*/
5367 /*--- STOREV16 ---*/
5368 /*------------------------------------------------------------*/
5370 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5371 static INLINE
5372 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5374 UInt shift;
5375 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5376 shift = (a & 2) << 1; // shift by 0 or 4
5377 vabits8 >>= shift; // shift the four bits to the bottom
5378 // check 2 x vabits2 != VA_BITS2_NOACCESS
5379 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5380 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5383 static INLINE
5384 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5386 PROF_EVENT(MCPE_STOREV16);
5388 #ifndef PERF_FAST_STOREV
5389 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5390 #else
5392 UWord sm_off, vabits8;
5393 SecMap* sm;
5395 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5396 PROF_EVENT(MCPE_STOREV16_SLOW1);
5397 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5398 return;
5401 sm = get_secmap_for_reading_low(a);
5402 sm_off = SM_OFF(a);
5403 vabits8 = sm->vabits8[sm_off];
5405 // To understand the below cleverness, see the extensive comments
5406 // in MC_(helperc_STOREV8).
5407 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5408 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5409 return;
5411 if (!is_distinguished_sm(sm)
5412 && accessible_vabits4_in_vabits8(a, vabits8)) {
5413 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5414 &(sm->vabits8[sm_off]) );
5415 return;
5417 PROF_EVENT(MCPE_STOREV16_SLOW2);
5418 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5420 if (V_BITS16_UNDEFINED == vbits16) {
5421 if (vabits8 == VA_BITS8_UNDEFINED) {
5422 return;
5424 if (!is_distinguished_sm(sm)
5425 && accessible_vabits4_in_vabits8(a, vabits8)) {
5426 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5427 &(sm->vabits8[sm_off]) );
5428 return;
5430 PROF_EVENT(MCPE_STOREV16_SLOW3);
5431 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5432 return;
5435 PROF_EVENT(MCPE_STOREV16_SLOW4);
5436 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5438 #endif
5442 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5444 mc_STOREV16(a, vbits16, True);
5446 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5448 mc_STOREV16(a, vbits16, False);
5451 /*------------------------------------------------------------*/
5452 /*--- LOADV8 ---*/
5453 /*------------------------------------------------------------*/
5455 /* Note: endianness is irrelevant for size == 1 */
5457 // Non-generic assembly for arm32-linux
5458 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5459 && defined(VGP_arm_linux)
5460 __asm__( /* Derived from NCode template */
5461 ".text \n"
5462 ".align 2 \n"
5463 ".global vgMemCheck_helperc_LOADV8 \n"
5464 ".type vgMemCheck_helperc_LOADV8, %function \n"
5465 "vgMemCheck_helperc_LOADV8: \n" //
5466 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5467 " movw r3, #:lower16:primary_map \n" //
5468 " uxth r1, r0 \n" // r1 = sec-map-offB
5469 " movt r3, #:upper16:primary_map \n" //
5470 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5471 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5472 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5473 " bne .LLV8c0 \n" // no, goto .LLV8c0
5474 ".LLV8h9: \n" //
5475 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5476 " bx lr \n" //
5477 ".LLV8c0: \n" //
5478 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5479 " bne .LLV8c4 \n" //
5480 ".LLV8c2: \n" //
5481 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5482 " bx lr \n" //
5483 ".LLV8c4: \n" //
5484 // r1 holds sec-map-VABITS8
5485 // r0 holds the address. Extract the relevant 2 bits and inspect.
5486 " and r2, r0, #3 \n" // addr & 3
5487 " add r2, r2, r2 \n" // 2 * (addr & 3)
5488 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5489 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5491 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5492 " beq .LLV8h9 \n" //
5494 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5495 " beq .LLV8c2 \n" //
5497 " push {r4, lr} \n" //
5498 " mov r2, #0 \n" //
5499 " mov r1, #8 \n" //
5500 " bl mc_LOADVn_slow \n" //
5501 " pop {r4, pc} \n" //
5502 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5503 ".previous\n"
5506 /* Non-generic assembly for x86-linux */
5507 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5508 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5509 __asm__(
5510 ".text\n"
5511 ".align 16\n"
5512 ".global vgMemCheck_helperc_LOADV8\n"
5513 ".type vgMemCheck_helperc_LOADV8, @function\n"
5514 "vgMemCheck_helperc_LOADV8:\n"
5515 " mov %eax, %edx\n"
5516 " shr $0x10, %edx\n"
5517 " mov primary_map(,%edx,4), %ecx\n"
5518 " movzwl %ax, %edx\n"
5519 " shr $0x2, %edx\n"
5520 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5521 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5522 " jne .LLV8LE2\n" /* jump if not defined */
5523 ".LLV8LE1:\n"
5524 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5525 " ret\n"
5526 ".LLV8LE2:\n"
5527 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5528 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5529 ".LLV8LE3:\n"
5530 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5531 " ret\n"
5532 ".LLV8LE4:\n"
5533 " mov %eax, %ecx\n"
5534 " and $0x3, %ecx\n"
5535 " add %ecx, %ecx\n"
5536 " sar %cl, %edx\n"
5537 " and $0x3, %edx\n"
5538 " cmp $0x2, %edx\n"
5539 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5540 " cmp $0x1, %edx\n"
5541 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5542 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5543 " mov $0x8, %edx\n"
5544 " jmp mc_LOADVn_slow\n"
5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5546 ".previous\n"
5549 #else
5550 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5551 VG_REGPARM(1)
5552 UWord MC_(helperc_LOADV8) ( Addr a )
5554 PROF_EVENT(MCPE_LOADV8);
5556 #ifndef PERF_FAST_LOADV
5557 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5558 #else
5560 UWord sm_off, vabits8;
5561 SecMap* sm;
5563 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5564 PROF_EVENT(MCPE_LOADV8_SLOW1);
5565 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5568 sm = get_secmap_for_reading_low(a);
5569 sm_off = SM_OFF(a);
5570 vabits8 = sm->vabits8[sm_off];
5571 // Convert V bits from compact memory form to expanded register form
5572 // Handle common case quickly: a is mapped, and the entire
5573 // word32 it lives in is addressible.
5574 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5575 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5576 else {
5577 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5578 // the single byte.
5579 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5580 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5581 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5582 else {
5583 /* Slow case: the byte is not all-defined or all-undefined. */
5584 PROF_EVENT(MCPE_LOADV8_SLOW2);
5585 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5589 #endif
5591 #endif
5593 /*------------------------------------------------------------*/
5594 /*--- STOREV8 ---*/
5595 /*------------------------------------------------------------*/
5597 VG_REGPARM(2)
5598 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5600 PROF_EVENT(MCPE_STOREV8);
5602 #ifndef PERF_FAST_STOREV
5603 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5604 #else
5606 UWord sm_off, vabits8;
5607 SecMap* sm;
5609 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5610 PROF_EVENT(MCPE_STOREV8_SLOW1);
5611 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5612 return;
5615 sm = get_secmap_for_reading_low(a);
5616 sm_off = SM_OFF(a);
5617 vabits8 = sm->vabits8[sm_off];
5619 // Clevernesses to speed up storing V bits.
5620 // The 64/32/16 bit cases also have similar clevernesses, but it
5621 // works a little differently to the code below.
5623 // Cleverness 1: sometimes we don't have to write the shadow memory at
5624 // all, if we can tell that what we want to write is the same as what is
5625 // already there. These cases are marked below as "defined on defined" and
5626 // "undefined on undefined".
5628 // Cleverness 2:
5629 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5630 // be written in the secondary map. V bits can be directly written
5631 // if 4 conditions are respected:
5632 // * The address for which V bits are written is naturally aligned
5633 // on 1 byte for STOREV8 (this is always true)
5634 // on 2 bytes for STOREV16
5635 // on 4 bytes for STOREV32
5636 // on 8 bytes for STOREV64.
5637 // * V bits being written are either fully defined or fully undefined.
5638 // (for partially defined V bits, V bits cannot be directly written,
5639 // as the secondary vbits table must be maintained).
5640 // * the secmap is not distinguished (distinguished maps cannot be
5641 // modified).
5642 // * the memory corresponding to the V bits being written is
5643 // accessible (if one or more bytes are not accessible,
5644 // we must call mc_STOREVn_slow in order to report accessibility
5645 // errors).
5646 // Note that for STOREV32 and STOREV64, it is too expensive
5647 // to verify the accessibility of each byte for the benefit it
5648 // brings. Instead, a quicker check is done by comparing to
5649 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5650 // but misses some opportunity of direct modifications.
5651 // Checking each byte accessibility was measured for
5652 // STOREV32+perf tests and was slowing down all perf tests.
5653 // The cases corresponding to cleverness 2 are marked below as
5654 // "direct mod".
5655 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5656 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5657 return; // defined on defined
5659 if (!is_distinguished_sm(sm)
5660 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5661 // direct mod
5662 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5663 &(sm->vabits8[sm_off]) );
5664 return;
5666 PROF_EVENT(MCPE_STOREV8_SLOW2);
5667 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5668 return;
5670 if (V_BITS8_UNDEFINED == vbits8) {
5671 if (vabits8 == VA_BITS8_UNDEFINED) {
5672 return; // undefined on undefined
5674 if (!is_distinguished_sm(sm)
5675 && (VA_BITS2_NOACCESS
5676 != extract_vabits2_from_vabits8(a, vabits8))) {
5677 // direct mod
5678 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5679 &(sm->vabits8[sm_off]) );
5680 return;
5682 PROF_EVENT(MCPE_STOREV8_SLOW3);
5683 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5684 return;
5687 // Partially defined word
5688 PROF_EVENT(MCPE_STOREV8_SLOW4);
5689 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5691 #endif
5695 /*------------------------------------------------------------*/
5696 /*--- Functions called directly from generated code: ---*/
5697 /*--- Value-check failure handlers. ---*/
5698 /*------------------------------------------------------------*/
5700 /* Call these ones when an origin is available ... */
5701 VG_REGPARM(1)
5702 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5703 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5706 VG_REGPARM(1)
5707 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5708 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5711 VG_REGPARM(1)
5712 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5713 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5716 VG_REGPARM(1)
5717 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5718 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5721 VG_REGPARM(2)
5722 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5723 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5726 /* ... and these when an origin isn't available. */
5728 VG_REGPARM(0)
5729 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5730 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5733 VG_REGPARM(0)
5734 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5735 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5738 VG_REGPARM(0)
5739 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5740 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5743 VG_REGPARM(0)
5744 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5745 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5748 VG_REGPARM(1)
5749 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5750 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5754 /*------------------------------------------------------------*/
5755 /*--- Metadata get/set functions, for client requests. ---*/
5756 /*------------------------------------------------------------*/
5758 // Nb: this expands the V+A bits out into register-form V bits, even though
5759 // they're in memory. This is for backward compatibility, and because it's
5760 // probably what the user wants.
5762 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5763 error [no longer used], 3 == addressing error. */
5764 /* Nb: We used to issue various definedness/addressability errors from here,
5765 but we took them out because they ranged from not-very-helpful to
5766 downright annoying, and they complicated the error data structures. */
5767 static Int mc_get_or_set_vbits_for_client (
5768 Addr a,
5769 Addr vbits,
5770 SizeT szB,
5771 Bool setting, /* True <=> set vbits, False <=> get vbits */
5772 Bool is_client_request /* True <=> real user request
5773 False <=> internal call from gdbserver */
5776 SizeT i;
5777 Bool ok;
5778 UChar vbits8;
5780 /* Check that arrays are addressible before doing any getting/setting.
5781 vbits to be checked only for real user request. */
5782 for (i = 0; i < szB; i++) {
5783 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5784 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5785 return 3;
5789 /* Do the copy */
5790 if (setting) {
5791 /* setting */
5792 for (i = 0; i < szB; i++) {
5793 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5794 tl_assert(ok);
5796 } else {
5797 /* getting */
5798 for (i = 0; i < szB; i++) {
5799 ok = get_vbits8(a + i, &vbits8);
5800 tl_assert(ok);
5801 ((UChar*)vbits)[i] = vbits8;
5803 if (is_client_request)
5804 // The bytes in vbits[] have now been set, so mark them as such.
5805 MC_(make_mem_defined)(vbits, szB);
5808 return 1;
5812 /*------------------------------------------------------------*/
5813 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5814 /*------------------------------------------------------------*/
5816 /* For the memory leak detector, say whether an entire 64k chunk of
5817 address space is possibly in use, or not. If in doubt return
5818 True.
5820 Bool MC_(is_within_valid_secondary) ( Addr a )
5822 SecMap* sm = maybe_get_secmap_for ( a );
5823 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5824 /* Definitely not in use. */
5825 return False;
5826 } else {
5827 return True;
5832 /* For the memory leak detector, say whether or not a given word
5833 address is to be regarded as valid. */
5834 Bool MC_(is_valid_aligned_word) ( Addr a )
5836 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5837 tl_assert(VG_IS_WORD_ALIGNED(a));
5838 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5839 return False;
5840 if (sizeof(UWord) == 8) {
5841 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5842 return False;
5844 if (UNLIKELY(MC_(in_ignored_range)(a)))
5845 return False;
5846 else
5847 return True;
5851 /*------------------------------------------------------------*/
5852 /*--- Initialisation ---*/
5853 /*------------------------------------------------------------*/
5855 static void init_shadow_memory ( void )
5857 Int i;
5858 SecMap* sm;
5860 tl_assert(V_BIT_UNDEFINED == 1);
5861 tl_assert(V_BIT_DEFINED == 0);
5862 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5863 tl_assert(V_BITS8_DEFINED == 0);
5865 /* Build the 3 distinguished secondaries */
5866 sm = &sm_distinguished[SM_DIST_NOACCESS];
5867 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5869 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5870 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5872 sm = &sm_distinguished[SM_DIST_DEFINED];
5873 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5875 /* Set up the primary map. */
5876 /* These entries gradually get overwritten as the used address
5877 space expands. */
5878 for (i = 0; i < N_PRIMARY_MAP; i++)
5879 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5881 /* Auxiliary primary maps */
5882 init_auxmap_L1_L2();
5884 /* auxmap_size = auxmap_used = 0;
5885 no ... these are statically initialised */
5887 /* Secondary V bit table */
5888 secVBitTable = createSecVBitTable();
5892 /*------------------------------------------------------------*/
5893 /*--- Sanity check machinery (permanently engaged) ---*/
5894 /*------------------------------------------------------------*/
5896 static Bool mc_cheap_sanity_check ( void )
5898 n_sanity_cheap++;
5899 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5900 /* Check for sane operating level */
5901 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5902 return False;
5903 /* nothing else useful we can rapidly check */
5904 return True;
5907 static Bool mc_expensive_sanity_check ( void )
5909 Int i;
5910 Word n_secmaps_found;
5911 SecMap* sm;
5912 const HChar* errmsg;
5913 Bool bad = False;
5915 if (0) VG_(printf)("expensive sanity check\n");
5916 if (0) return True;
5918 n_sanity_expensive++;
5919 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5921 /* Check for sane operating level */
5922 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5923 return False;
5925 /* Check that the 3 distinguished SMs are still as they should be. */
5927 /* Check noaccess DSM. */
5928 sm = &sm_distinguished[SM_DIST_NOACCESS];
5929 for (i = 0; i < SM_CHUNKS; i++)
5930 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5931 bad = True;
5933 /* Check undefined DSM. */
5934 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5935 for (i = 0; i < SM_CHUNKS; i++)
5936 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5937 bad = True;
5939 /* Check defined DSM. */
5940 sm = &sm_distinguished[SM_DIST_DEFINED];
5941 for (i = 0; i < SM_CHUNKS; i++)
5942 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5943 bad = True;
5945 if (bad) {
5946 VG_(printf)("memcheck expensive sanity: "
5947 "distinguished_secondaries have changed\n");
5948 return False;
5951 /* If we're not checking for undefined value errors, the secondary V bit
5952 * table should be empty. */
5953 if (MC_(clo_mc_level) == 1) {
5954 if (0 != VG_(OSetGen_Size)(secVBitTable))
5955 return False;
5958 /* check the auxiliary maps, very thoroughly */
5959 n_secmaps_found = 0;
5960 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5961 if (errmsg) {
5962 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5963 return False;
5966 /* n_secmaps_found is now the number referred to by the auxiliary
5967 primary map. Now add on the ones referred to by the main
5968 primary map. */
5969 for (i = 0; i < N_PRIMARY_MAP; i++) {
5970 if (primary_map[i] == NULL) {
5971 bad = True;
5972 } else {
5973 if (!is_distinguished_sm(primary_map[i]))
5974 n_secmaps_found++;
5978 /* check that the number of secmaps issued matches the number that
5979 are reachable (iow, no secmap leaks) */
5980 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5981 bad = True;
5983 if (bad) {
5984 VG_(printf)("memcheck expensive sanity: "
5985 "apparent secmap leakage\n");
5986 return False;
5989 if (bad) {
5990 VG_(printf)("memcheck expensive sanity: "
5991 "auxmap covers wrong address space\n");
5992 return False;
5995 /* there is only one pointer to each secmap (expensive) */
5997 return True;
6000 /*------------------------------------------------------------*/
6001 /*--- Command line args ---*/
6002 /*------------------------------------------------------------*/
6004 /* 31 Aug 2015: Vectorised code is now so widespread that
6005 --partial-loads-ok needs to be enabled by default on all platforms.
6006 Not doing so causes lots of false errors. */
6007 Bool MC_(clo_partial_loads_ok) = True;
6008 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
6009 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
6010 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
6011 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
6012 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
6013 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
6014 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString)
6015 | H2S( LchLength64)
6016 | H2S( LchNewArray)
6017 | H2S( LchMultipleInheritance);
6018 Bool MC_(clo_xtree_leak) = False;
6019 const HChar* MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
6020 Bool MC_(clo_workaround_gcc296_bugs) = False;
6021 Int MC_(clo_malloc_fill) = -1;
6022 Int MC_(clo_free_fill) = -1;
6023 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free;
6024 Int MC_(clo_mc_level) = 2;
6025 Bool MC_(clo_show_mismatched_frees) = True;
6026 Bool MC_(clo_expensive_definedness_checks) = False;
6027 Bool MC_(clo_ignore_range_below_sp) = False;
6028 UInt MC_(clo_ignore_range_below_sp__first_offset) = 0;
6029 UInt MC_(clo_ignore_range_below_sp__last_offset) = 0;
6031 static const HChar * MC_(parse_leak_heuristics_tokens) =
6032 "-,stdstring,length64,newarray,multipleinheritance";
6033 /* The first heuristic value (LchNone) has no keyword, as this is
6034 a fake heuristic used to collect the blocks found without any
6035 heuristic. */
6037 static Bool mc_process_cmd_line_options(const HChar* arg)
6039 const HChar* tmp_str;
6040 Int tmp_show;
6042 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6044 /* Set MC_(clo_mc_level):
6045 1 = A bit tracking only
6046 2 = A and V bit tracking, but no V bit origins
6047 3 = A and V bit tracking, and V bit origins
6049 Do this by inspecting --undef-value-errors= and
6050 --track-origins=. Reject the case --undef-value-errors=no
6051 --track-origins=yes as meaningless.
6053 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
6054 if (MC_(clo_mc_level) == 3) {
6055 goto bad_level;
6056 } else {
6057 MC_(clo_mc_level) = 1;
6058 return True;
6061 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
6062 if (MC_(clo_mc_level) == 1)
6063 MC_(clo_mc_level) = 2;
6064 return True;
6066 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
6067 if (MC_(clo_mc_level) == 3)
6068 MC_(clo_mc_level) = 2;
6069 return True;
6071 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
6072 if (MC_(clo_mc_level) == 1) {
6073 goto bad_level;
6074 } else {
6075 MC_(clo_mc_level) = 3;
6076 return True;
6080 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
6081 else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
6082 MC_(parse_leak_kinds_tokens),
6083 MC_(clo_error_for_leak_kinds)) {}
6084 else if VG_USET_CLO(arg, "--show-leak-kinds",
6085 MC_(parse_leak_kinds_tokens),
6086 MC_(clo_show_leak_kinds)) {}
6087 else if VG_USET_CLO(arg, "--leak-check-heuristics",
6088 MC_(parse_leak_heuristics_tokens),
6089 MC_(clo_leak_check_heuristics)) {}
6090 else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
6091 if (tmp_show) {
6092 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
6093 } else {
6094 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
6097 else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
6098 if (tmp_show) {
6099 MC_(clo_show_leak_kinds) |= R2S(Possible);
6100 } else {
6101 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
6104 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
6105 MC_(clo_workaround_gcc296_bugs)) {}
6107 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
6108 0, 10*1000*1000*1000LL) {}
6110 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
6111 MC_(clo_freelist_big_blocks),
6112 0, 10*1000*1000*1000LL) {}
6114 else if VG_XACT_CLO(arg, "--leak-check=no",
6115 MC_(clo_leak_check), LC_Off) {}
6116 else if VG_XACT_CLO(arg, "--leak-check=summary",
6117 MC_(clo_leak_check), LC_Summary) {}
6118 else if VG_XACT_CLO(arg, "--leak-check=yes",
6119 MC_(clo_leak_check), LC_Full) {}
6120 else if VG_XACT_CLO(arg, "--leak-check=full",
6121 MC_(clo_leak_check), LC_Full) {}
6123 else if VG_XACT_CLO(arg, "--leak-resolution=low",
6124 MC_(clo_leak_resolution), Vg_LowRes) {}
6125 else if VG_XACT_CLO(arg, "--leak-resolution=med",
6126 MC_(clo_leak_resolution), Vg_MedRes) {}
6127 else if VG_XACT_CLO(arg, "--leak-resolution=high",
6128 MC_(clo_leak_resolution), Vg_HighRes) {}
6130 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
6131 Bool ok = parse_ignore_ranges(tmp_str);
6132 if (!ok) {
6133 VG_(message)(Vg_DebugMsg,
6134 "ERROR: --ignore-ranges: "
6135 "invalid syntax, or end <= start in range\n");
6136 return False;
6138 if (gIgnoredAddressRanges) {
6139 UInt i;
6140 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6141 UWord val = IAR_INVALID;
6142 UWord key_min = ~(UWord)0;
6143 UWord key_max = (UWord)0;
6144 VG_(indexRangeMap)( &key_min, &key_max, &val,
6145 gIgnoredAddressRanges, i );
6146 tl_assert(key_min <= key_max);
6147 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
6148 if (key_max - key_min > limit && val == IAR_CommandLine) {
6149 VG_(message)(Vg_DebugMsg,
6150 "ERROR: --ignore-ranges: suspiciously large range:\n");
6151 VG_(message)(Vg_DebugMsg,
6152 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
6153 key_max - key_min + 1);
6154 return False;
6160 else if VG_STR_CLO(arg, "--ignore-range-below-sp", tmp_str) {
6161 /* This seems at first a bit weird, but: in order to imply
6162 a non-wrapped-around address range, the first offset needs to be
6163 larger than the second one. For example
6164 --ignore-range-below-sp=8192,8189
6165 would cause accesses to in the range [SP-8192, SP-8189] to be
6166 ignored. */
6167 UInt offs1 = 0, offs2 = 0;
6168 Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
6169 // Ensure we used all the text after the '=' sign.
6170 if (ok && *tmp_str != 0) ok = False;
6171 if (!ok) {
6172 VG_(message)(Vg_DebugMsg,
6173 "ERROR: --ignore-range-below-sp: invalid syntax. "
6174 " Expected \"...=decimalnumber-decimalnumber\".\n");
6175 return False;
6177 if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
6178 VG_(message)(Vg_DebugMsg,
6179 "ERROR: --ignore-range-below-sp: suspiciously large "
6180 "offset(s): %u and %u\n", offs1, offs2);
6181 return False;
6183 if (offs1 <= offs2) {
6184 VG_(message)(Vg_DebugMsg,
6185 "ERROR: --ignore-range-below-sp: invalid offsets "
6186 "(the first must be larger): %u and %u\n", offs1, offs2);
6187 return False;
6189 tl_assert(offs1 > offs2);
6190 if (offs1 - offs2 > 4096 /*arbitrary*/) {
6191 VG_(message)(Vg_DebugMsg,
6192 "ERROR: --ignore-range-below-sp: suspiciously large "
6193 "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
6194 return False;
6196 MC_(clo_ignore_range_below_sp) = True;
6197 MC_(clo_ignore_range_below_sp__first_offset) = offs1;
6198 MC_(clo_ignore_range_below_sp__last_offset) = offs2;
6199 return True;
6202 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
6203 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
6205 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
6206 MC_(clo_keep_stacktraces), KS_alloc) {}
6207 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
6208 MC_(clo_keep_stacktraces), KS_free) {}
6209 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
6210 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
6211 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
6212 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
6213 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
6214 MC_(clo_keep_stacktraces), KS_none) {}
6216 else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
6217 MC_(clo_show_mismatched_frees)) {}
6218 else if VG_BOOL_CLO(arg, "--expensive-definedness-checks",
6219 MC_(clo_expensive_definedness_checks)) {}
6221 else if VG_BOOL_CLO(arg, "--xtree-leak",
6222 MC_(clo_xtree_leak)) {}
6223 else if VG_STR_CLO (arg, "--xtree-leak-file",
6224 MC_(clo_xtree_leak_file)) {}
6226 else
6227 return VG_(replacement_malloc_process_cmd_line_option)(arg);
6229 return True;
6232 bad_level:
6233 VG_(fmsg_bad_option)(arg,
6234 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6237 static void mc_print_usage(void)
6239 VG_(printf)(
6240 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6241 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6242 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6243 " [definite,possible]\n"
6244 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6245 " [definite,possible]\n"
6246 " where kind is one of:\n"
6247 " definite indirect possible reachable all none\n"
6248 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6249 " improving leak search false positive [all]\n"
6250 " where heur is one of:\n"
6251 " stdstring length64 newarray multipleinheritance all none\n"
6252 " --show-reachable=yes same as --show-leak-kinds=all\n"
6253 " --show-reachable=no --show-possibly-lost=yes\n"
6254 " same as --show-leak-kinds=definite,possible\n"
6255 " --show-reachable=no --show-possibly-lost=no\n"
6256 " same as --show-leak-kinds=definite\n"
6257 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6258 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6259 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6260 " --track-origins=no|yes show origins of undefined values? [no]\n"
6261 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6262 " --expensive-definedness-checks=no|yes\n"
6263 " Use extra-precise definedness tracking [no]\n"
6264 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6265 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6266 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6267 " Use --ignore-range-below-sp instead.\n"
6268 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6269 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6270 " accesses at the given offsets below SP\n"
6271 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6272 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6273 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6274 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6275 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6279 static void mc_print_debug_usage(void)
6281 VG_(printf)(
6282 " (none)\n"
6287 /*------------------------------------------------------------*/
6288 /*--- Client blocks ---*/
6289 /*------------------------------------------------------------*/
6291 /* Client block management:
6293 This is managed as an expanding array of client block descriptors.
6294 Indices of live descriptors are issued to the client, so it can ask
6295 to free them later. Therefore we cannot slide live entries down
6296 over dead ones. Instead we must use free/inuse flags and scan for
6297 an empty slot at allocation time. This in turn means allocation is
6298 relatively expensive, so we hope this does not happen too often.
6300 An unused block has start == size == 0
6303 /* type CGenBlock is defined in mc_include.h */
6305 /* This subsystem is self-initialising. */
6306 static UWord cgb_size = 0;
6307 static UWord cgb_used = 0;
6308 static CGenBlock* cgbs = NULL;
6310 /* Stats for this subsystem. */
6311 static ULong cgb_used_MAX = 0; /* Max in use. */
6312 static ULong cgb_allocs = 0; /* Number of allocs. */
6313 static ULong cgb_discards = 0; /* Number of discards. */
6314 static ULong cgb_search = 0; /* Number of searches. */
6317 /* Get access to the client block array. */
6318 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
6319 /*OUT*/UWord* nBlocks )
6321 *blocks = cgbs;
6322 *nBlocks = cgb_used;
6326 static
6327 Int alloc_client_block ( void )
6329 UWord i, sz_new;
6330 CGenBlock* cgbs_new;
6332 cgb_allocs++;
6334 for (i = 0; i < cgb_used; i++) {
6335 cgb_search++;
6336 if (cgbs[i].start == 0 && cgbs[i].size == 0)
6337 return i;
6340 /* Not found. Try to allocate one at the end. */
6341 if (cgb_used < cgb_size) {
6342 cgb_used++;
6343 return cgb_used-1;
6346 /* Ok, we have to allocate a new one. */
6347 tl_assert(cgb_used == cgb_size);
6348 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
6350 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
6351 for (i = 0; i < cgb_used; i++)
6352 cgbs_new[i] = cgbs[i];
6354 if (cgbs != NULL)
6355 VG_(free)( cgbs );
6356 cgbs = cgbs_new;
6358 cgb_size = sz_new;
6359 cgb_used++;
6360 if (cgb_used > cgb_used_MAX)
6361 cgb_used_MAX = cgb_used;
6362 return cgb_used-1;
6366 static void show_client_block_stats ( void )
6368 VG_(message)(Vg_DebugMsg,
6369 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6370 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6373 static void print_monitor_help ( void )
6375 VG_(gdb_printf)
6377 "\n"
6378 "memcheck monitor commands:\n"
6379 " xb <addr> [<len>]\n"
6380 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6381 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6382 " Then prints the bytes values below the corresponding validity bits\n"
6383 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6384 " Example: xb 0x8049c78 10\n"
6385 " get_vbits <addr> [<len>]\n"
6386 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6387 " make_memory [noaccess|undefined\n"
6388 " |defined|Definedifaddressable] <addr> [<len>]\n"
6389 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6390 " check_memory [addressable|defined] <addr> [<len>]\n"
6391 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6392 " and outputs a description of <addr>\n"
6393 " leak_check [full*|summary|xtleak]\n"
6394 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6395 " [heuristics heur1,heur2,...]\n"
6396 " [increased*|changed|any]\n"
6397 " [unlimited*|limited <max_loss_records_output>]\n"
6398 " * = defaults\n"
6399 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6400 " where kind is one of:\n"
6401 " definite indirect possible reachable all none\n"
6402 " where heur is one of:\n"
6403 " stdstring length64 newarray multipleinheritance all none*\n"
6404 " Examples: leak_check\n"
6405 " leak_check summary any\n"
6406 " leak_check full kinds indirect,possible\n"
6407 " leak_check full reachable any limited 100\n"
6408 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6409 " [unlimited*|limited <max_blocks>]\n"
6410 " [heuristics heur1,heur2,...]\n"
6411 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6412 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6413 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6414 " * = defaults\n"
6415 " who_points_at <addr> [<len>]\n"
6416 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6417 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6418 " with len > 1, will also show \"interior pointers\")\n"
6419 " xtmemory [<filename>]\n"
6420 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6421 "\n");
6424 /* Print szB bytes at address, with a format similar to the gdb command
6425 x /<szB>xb address.
6426 res[i] == 1 indicates the corresponding byte is addressable. */
6427 static void gdb_xb (Addr address, SizeT szB, Int res[])
6429 UInt i;
6431 for (i = 0; i < szB; i++) {
6432 UInt bnr = i % 8;
6433 if (bnr == 0) {
6434 if (i != 0)
6435 VG_(printf) ("\n"); // Terminate previous line
6436 VG_(printf) ("%p:", (void*)(address+i));
6438 if (res[i] == 1)
6439 VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6440 else
6441 VG_(printf) ("\t0x??");
6443 VG_(printf) ("\n"); // Terminate previous line
6447 /* Returns the address of the next non space character,
6448 or address of the string terminator. */
6449 static HChar* next_non_space (HChar *s)
6451 while (*s && *s == ' ')
6452 s++;
6453 return s;
6456 /* Parse an integer slice, i.e. a single integer or a range of integer.
6457 Syntax is:
6458 <integer>[..<integer> ]
6459 (spaces are allowed before and/or after ..).
6460 Return True if range correctly parsed, False otherwise. */
6461 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6462 UInt *from, UInt *to)
6464 HChar* wl;
6465 HChar *endptr;
6466 endptr = NULL;////
6467 wl = VG_(strtok_r) (s, " ", saveptr);
6469 /* slice must start with an integer. */
6470 if (wl == NULL) {
6471 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6472 return False;
6474 *from = VG_(strtoull10) (wl, &endptr);
6475 if (endptr == wl) {
6476 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6477 return False;
6480 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6481 /* wl token is an integer terminating the string
6482 or else next token does not start with .
6483 In both cases, the slice is a single integer. */
6484 *to = *from;
6485 return True;
6488 if (*endptr == '\0') {
6489 // iii .. => get the next token
6490 wl = VG_(strtok_r) (NULL, " .", saveptr);
6491 } else {
6492 // It must be iii..
6493 if (*endptr != '.' && *(endptr+1) != '.') {
6494 VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6495 return False;
6497 if ( *(endptr+2) == ' ') {
6498 // It must be iii.. jjj => get the next token
6499 wl = VG_(strtok_r) (NULL, " .", saveptr);
6500 } else {
6501 // It must be iii..jjj
6502 wl = endptr+2;
6506 *to = VG_(strtoull10) (wl, &endptr);
6507 if (*endptr != '\0') {
6508 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6509 return False;
6512 if (*from > *to) {
6513 VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6514 "in slice <from>..<to>\n");
6515 return False;
6518 return True;
6521 /* return True if request recognised, False otherwise */
6522 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6524 HChar* wcmd;
6525 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
6526 HChar *ssaveptr;
6528 VG_(strcpy) (s, req);
6530 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6531 /* NB: if possible, avoid introducing a new command below which
6532 starts with the same first letter(s) as an already existing
6533 command. This ensures a shorter abbreviation for the user. */
6534 switch (VG_(keyword_id)
6535 ("help get_vbits leak_check make_memory check_memory "
6536 "block_list who_points_at xb xtmemory",
6537 wcmd, kwd_report_duplicated_matches)) {
6538 case -2: /* multiple matches */
6539 return True;
6540 case -1: /* not found */
6541 return False;
6542 case 0: /* help */
6543 print_monitor_help();
6544 return True;
6545 case 1: { /* get_vbits */
6546 Addr address;
6547 SizeT szB = 1;
6548 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6549 UChar vbits;
6550 Int i;
6551 Int unaddressable = 0;
6552 for (i = 0; i < szB; i++) {
6553 Int res = mc_get_or_set_vbits_for_client
6554 (address+i, (Addr) &vbits, 1,
6555 False, /* get them */
6556 False /* is client request */ );
6557 /* we are before the first character on next line, print a \n. */
6558 if ((i % 32) == 0 && i != 0)
6559 VG_(printf) ("\n");
6560 /* we are before the next block of 4 starts, print a space. */
6561 else if ((i % 4) == 0 && i != 0)
6562 VG_(printf) (" ");
6563 if (res == 1) {
6564 VG_(printf) ("%02x", vbits);
6565 } else {
6566 tl_assert(3 == res);
6567 unaddressable++;
6568 VG_(printf) ("__");
6571 VG_(printf) ("\n");
6572 if (unaddressable) {
6573 VG_(printf)
6574 ("Address %p len %lu has %d bytes unaddressable\n",
6575 (void *)address, szB, unaddressable);
6578 return True;
6580 case 2: { /* leak_check */
6581 Int err = 0;
6582 LeakCheckParams lcp;
6583 HChar* xt_filename = NULL;
6584 HChar* kw;
6586 lcp.mode = LC_Full;
6587 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6588 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6589 lcp.heuristics = 0;
6590 lcp.deltamode = LCD_Increased;
6591 lcp.max_loss_records_output = 999999999;
6592 lcp.requested_by_monitor_command = True;
6593 lcp.xt_filename = NULL;
6595 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6596 kw != NULL;
6597 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6598 switch (VG_(keyword_id)
6599 ("full summary xtleak "
6600 "kinds reachable possibleleak definiteleak "
6601 "heuristics "
6602 "increased changed any "
6603 "unlimited limited ",
6604 kw, kwd_report_all)) {
6605 case -2: err++; break;
6606 case -1: err++; break;
6607 case 0: /* full */
6608 lcp.mode = LC_Full; break;
6609 case 1: /* summary */
6610 lcp.mode = LC_Summary; break;
6611 case 2: /* xtleak */
6612 lcp.mode = LC_Full;
6613 xt_filename
6614 = VG_(expand_file_name)("--xtleak-mc_main.c",
6615 "xtleak.kcg.%p.%n");
6616 lcp.xt_filename = xt_filename;
6617 break;
6618 case 3: { /* kinds */
6619 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6620 if (wcmd == NULL
6621 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6622 True/*allow_all*/,
6623 wcmd,
6624 &lcp.show_leak_kinds)) {
6625 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6626 err++;
6628 break;
6630 case 4: /* reachable */
6631 lcp.show_leak_kinds = MC_(all_Reachedness)();
6632 break;
6633 case 5: /* possibleleak */
6634 lcp.show_leak_kinds
6635 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6636 break;
6637 case 6: /* definiteleak */
6638 lcp.show_leak_kinds = R2S(Unreached);
6639 break;
6640 case 7: { /* heuristics */
6641 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6642 if (wcmd == NULL
6643 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6644 True,/*allow_all*/
6645 wcmd,
6646 &lcp.heuristics)) {
6647 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6648 err++;
6650 break;
6652 case 8: /* increased */
6653 lcp.deltamode = LCD_Increased; break;
6654 case 9: /* changed */
6655 lcp.deltamode = LCD_Changed; break;
6656 case 10: /* any */
6657 lcp.deltamode = LCD_Any; break;
6658 case 11: /* unlimited */
6659 lcp.max_loss_records_output = 999999999; break;
6660 case 12: { /* limited */
6661 Int int_value;
6662 const HChar* endptr;
6664 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6665 if (wcmd == NULL) {
6666 int_value = 0;
6667 endptr = "empty"; /* to report an error below */
6668 } else {
6669 HChar *the_end;
6670 int_value = VG_(strtoll10) (wcmd, &the_end);
6671 endptr = the_end;
6673 if (*endptr != '\0')
6674 VG_(gdb_printf) ("missing or malformed integer value\n");
6675 else if (int_value > 0)
6676 lcp.max_loss_records_output = (UInt) int_value;
6677 else
6678 VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6679 " got %d\n", int_value);
6680 break;
6682 default:
6683 tl_assert (0);
6686 if (!err)
6687 MC_(detect_memory_leaks)(tid, &lcp);
6688 if (xt_filename != NULL)
6689 VG_(free)(xt_filename);
6690 return True;
6693 case 3: { /* make_memory */
6694 Addr address;
6695 SizeT szB = 1;
6696 Int kwdid = VG_(keyword_id)
6697 ("noaccess undefined defined Definedifaddressable",
6698 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6699 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6700 return True;
6701 switch (kwdid) {
6702 case -2: break;
6703 case -1: break;
6704 case 0: MC_(make_mem_noaccess) (address, szB); break;
6705 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6706 MC_OKIND_USER ); break;
6707 case 2: MC_(make_mem_defined) ( address, szB ); break;
6708 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6709 default: tl_assert(0);
6711 return True;
6714 case 4: { /* check_memory */
6715 Addr address;
6716 SizeT szB = 1;
6717 Addr bad_addr;
6718 UInt okind;
6719 const HChar* src;
6720 UInt otag;
6721 UInt ecu;
6722 ExeContext* origin_ec;
6723 MC_ReadResult res;
6725 Int kwdid = VG_(keyword_id)
6726 ("addressable defined",
6727 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6728 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6729 return True;
6730 switch (kwdid) {
6731 case -2: break;
6732 case -1: break;
6733 case 0: /* addressable */
6734 if (is_mem_addressable ( address, szB, &bad_addr ))
6735 VG_(printf) ("Address %p len %lu addressable\n",
6736 (void *)address, szB);
6737 else
6738 VG_(printf)
6739 ("Address %p len %lu not addressable:\nbad address %p\n",
6740 (void *)address, szB, (void *) bad_addr);
6741 MC_(pp_describe_addr) (address);
6742 break;
6743 case 1: /* defined */
6744 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6745 if (MC_AddrErr == res)
6746 VG_(printf)
6747 ("Address %p len %lu not addressable:\nbad address %p\n",
6748 (void *)address, szB, (void *) bad_addr);
6749 else if (MC_ValueErr == res) {
6750 okind = otag & 3;
6751 switch (okind) {
6752 case MC_OKIND_STACK:
6753 src = " was created by a stack allocation"; break;
6754 case MC_OKIND_HEAP:
6755 src = " was created by a heap allocation"; break;
6756 case MC_OKIND_USER:
6757 src = " was created by a client request"; break;
6758 case MC_OKIND_UNKNOWN:
6759 src = ""; break;
6760 default: tl_assert(0);
6762 VG_(printf)
6763 ("Address %p len %lu not defined:\n"
6764 "Uninitialised value at %p%s\n",
6765 (void *)address, szB, (void *) bad_addr, src);
6766 ecu = otag & ~3;
6767 if (VG_(is_plausible_ECU)(ecu)) {
6768 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6769 VG_(pp_ExeContext)( origin_ec );
6772 else
6773 VG_(printf) ("Address %p len %lu defined\n",
6774 (void *)address, szB);
6775 MC_(pp_describe_addr) (address);
6776 break;
6777 default: tl_assert(0);
6779 return True;
6782 case 5: { /* block_list */
6783 HChar* wl;
6784 HChar *the_end;
6785 UInt lr_nr_from = 0;
6786 UInt lr_nr_to = 0;
6788 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6789 UInt limit_blocks = 999999999;
6790 Int int_value;
6791 UInt heuristics = 0;
6793 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6794 wl != NULL;
6795 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6796 switch (VG_(keyword_id) ("unlimited limited heuristics ",
6797 wl, kwd_report_all)) {
6798 case -2: return True;
6799 case -1: return True;
6800 case 0: /* unlimited */
6801 limit_blocks = 999999999; break;
6802 case 1: /* limited */
6803 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6804 if (wcmd == NULL) {
6805 VG_(gdb_printf) ("missing integer value\n");
6806 return True;
6808 int_value = VG_(strtoll10) (wcmd, &the_end);
6809 if (*the_end != '\0') {
6810 VG_(gdb_printf) ("malformed integer value\n");
6811 return True;
6813 if (int_value <= 0) {
6814 VG_(gdb_printf) ("max_blocks must be >= 1,"
6815 " got %d\n", int_value);
6816 return True;
6818 limit_blocks = (UInt) int_value;
6819 break;
6820 case 2: /* heuristics */
6821 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6822 if (wcmd == NULL
6823 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6824 True,/*allow_all*/
6825 wcmd,
6826 &heuristics)) {
6827 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6828 return True;
6830 break;
6831 default:
6832 tl_assert (0);
6835 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6836 is 1 more than the index in lr_array. */
6837 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6838 lr_nr_to-1,
6839 limit_blocks,
6840 heuristics))
6841 VG_(gdb_printf) ("invalid loss record nr\n");
6843 return True;
6846 case 6: { /* who_points_at */
6847 Addr address;
6848 SizeT szB = 1;
6850 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6851 return True;
6852 if (address == (Addr) 0) {
6853 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6854 return True;
6856 MC_(who_points_at) (address, szB);
6857 return True;
6860 case 7: { /* xb */
6861 Addr address;
6862 SizeT szB = 1;
6863 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6864 UChar vbits[8];
6865 Int res[8];
6866 Int i;
6867 Int unaddressable = 0;
6868 for (i = 0; i < szB; i++) {
6869 Int bnr = i % 8;
6870 res[bnr] = mc_get_or_set_vbits_for_client
6871 (address+i, (Addr) &vbits[bnr], 1,
6872 False, /* get them */
6873 False /* is client request */ );
6874 /* We going to print the first vabits of a new line.
6875 Terminate the previous line if needed: prints a line with the
6876 address and the data. */
6877 if (bnr == 0) {
6878 if (i != 0) {
6879 VG_(printf) ("\n");
6880 gdb_xb (address + i - 8, 8, res);
6882 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6884 if (res[bnr] == 1) {
6885 VG_(printf) ("\t %02x", vbits[bnr]);
6886 } else {
6887 tl_assert(3 == res[bnr]);
6888 unaddressable++;
6889 VG_(printf) ("\t __");
6892 VG_(printf) ("\n");
6893 if (szB % 8 == 0 && szB > 0)
6894 gdb_xb (address + szB - 8, 8, res);
6895 else
6896 gdb_xb (address + szB - szB % 8, szB % 8, res);
6897 if (unaddressable) {
6898 VG_(printf)
6899 ("Address %p len %lu has %d bytes unaddressable\n",
6900 (void *)address, szB, unaddressable);
6903 return True;
6906 case 8: { /* xtmemory */
6907 HChar* filename;
6908 filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
6909 MC_(xtmemory_report)(filename, False);
6910 return True;
6913 default:
6914 tl_assert(0);
6915 return False;
6919 /*------------------------------------------------------------*/
6920 /*--- Client requests ---*/
6921 /*------------------------------------------------------------*/
6923 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6925 Int i;
6926 Addr bad_addr;
6928 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6929 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6930 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6931 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6932 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6933 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6934 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6935 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6936 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6937 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
6938 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
6939 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
6940 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
6941 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6942 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6943 return False;
6945 switch (arg[0]) {
6946 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
6947 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
6948 if (!ok)
6949 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
6950 *ret = ok ? (UWord)NULL : bad_addr;
6951 break;
6954 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
6955 Bool errorV = False;
6956 Addr bad_addrV = 0;
6957 UInt otagV = 0;
6958 Bool errorA = False;
6959 Addr bad_addrA = 0;
6960 is_mem_defined_comprehensive(
6961 arg[1], arg[2],
6962 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
6964 if (errorV) {
6965 MC_(record_user_error) ( tid, bad_addrV,
6966 /*isAddrErr*/False, otagV );
6968 if (errorA) {
6969 MC_(record_user_error) ( tid, bad_addrA,
6970 /*isAddrErr*/True, 0 );
6972 /* Return the lower of the two erring addresses, if any. */
6973 *ret = 0;
6974 if (errorV && !errorA) {
6975 *ret = bad_addrV;
6977 if (!errorV && errorA) {
6978 *ret = bad_addrA;
6980 if (errorV && errorA) {
6981 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
6983 break;
6986 case VG_USERREQ__DO_LEAK_CHECK: {
6987 LeakCheckParams lcp;
6989 if (arg[1] == 0)
6990 lcp.mode = LC_Full;
6991 else if (arg[1] == 1)
6992 lcp.mode = LC_Summary;
6993 else {
6994 VG_(message)(Vg_UserMsg,
6995 "Warning: unknown memcheck leak search mode\n");
6996 lcp.mode = LC_Full;
6999 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7000 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7001 lcp.heuristics = MC_(clo_leak_check_heuristics);
7003 if (arg[2] == 0)
7004 lcp.deltamode = LCD_Any;
7005 else if (arg[2] == 1)
7006 lcp.deltamode = LCD_Increased;
7007 else if (arg[2] == 2)
7008 lcp.deltamode = LCD_Changed;
7009 else {
7010 VG_(message)
7011 (Vg_UserMsg,
7012 "Warning: unknown memcheck leak search deltamode\n");
7013 lcp.deltamode = LCD_Any;
7015 lcp.max_loss_records_output = 999999999;
7016 lcp.requested_by_monitor_command = False;
7017 lcp.xt_filename = NULL;
7019 MC_(detect_memory_leaks)(tid, &lcp);
7020 *ret = 0; /* return value is meaningless */
7021 break;
7024 case VG_USERREQ__MAKE_MEM_NOACCESS:
7025 MC_(make_mem_noaccess) ( arg[1], arg[2] );
7026 *ret = -1;
7027 break;
7029 case VG_USERREQ__MAKE_MEM_UNDEFINED:
7030 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
7031 MC_OKIND_USER );
7032 *ret = -1;
7033 break;
7035 case VG_USERREQ__MAKE_MEM_DEFINED:
7036 MC_(make_mem_defined) ( arg[1], arg[2] );
7037 *ret = -1;
7038 break;
7040 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
7041 make_mem_defined_if_addressable ( arg[1], arg[2] );
7042 *ret = -1;
7043 break;
7045 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
7046 if (arg[1] != 0 && arg[2] != 0) {
7047 i = alloc_client_block();
7048 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7049 cgbs[i].start = arg[1];
7050 cgbs[i].size = arg[2];
7051 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
7052 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
7053 *ret = i;
7054 } else
7055 *ret = -1;
7056 break;
7058 case VG_USERREQ__DISCARD: /* discard */
7059 if (cgbs == NULL
7060 || arg[2] >= cgb_used ||
7061 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
7062 *ret = 1;
7063 } else {
7064 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
7065 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
7066 VG_(free)(cgbs[arg[2]].desc);
7067 cgb_discards++;
7068 *ret = 0;
7070 break;
7072 case VG_USERREQ__GET_VBITS:
7073 *ret = mc_get_or_set_vbits_for_client
7074 ( arg[1], arg[2], arg[3],
7075 False /* get them */,
7076 True /* is client request */ );
7077 break;
7079 case VG_USERREQ__SET_VBITS:
7080 *ret = mc_get_or_set_vbits_for_client
7081 ( arg[1], arg[2], arg[3],
7082 True /* set them */,
7083 True /* is client request */ );
7084 break;
7086 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
7087 UWord** argp = (UWord**)arg;
7088 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7089 // if no prior leak checks performed).
7090 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
7091 *argp[2] = MC_(bytes_dubious);
7092 *argp[3] = MC_(bytes_reachable);
7093 *argp[4] = MC_(bytes_suppressed);
7094 // there is no argp[5]
7095 //*argp[5] = MC_(bytes_indirect);
7096 // XXX need to make *argp[1-4] defined; currently done in the
7097 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7098 *ret = 0;
7099 return True;
7101 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
7102 UWord** argp = (UWord**)arg;
7103 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7104 // if no prior leak checks performed).
7105 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
7106 *argp[2] = MC_(blocks_dubious);
7107 *argp[3] = MC_(blocks_reachable);
7108 *argp[4] = MC_(blocks_suppressed);
7109 // there is no argp[5]
7110 //*argp[5] = MC_(blocks_indirect);
7111 // XXX need to make *argp[1-4] defined; currently done in the
7112 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7113 *ret = 0;
7114 return True;
7116 case VG_USERREQ__MALLOCLIKE_BLOCK: {
7117 Addr p = (Addr)arg[1];
7118 SizeT sizeB = arg[2];
7119 UInt rzB = arg[3];
7120 Bool is_zeroed = (Bool)arg[4];
7122 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
7123 MC_AllocCustom, MC_(malloc_list) );
7124 if (rzB > 0) {
7125 MC_(make_mem_noaccess) ( p - rzB, rzB);
7126 MC_(make_mem_noaccess) ( p + sizeB, rzB);
7128 return True;
7130 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
7131 Addr p = (Addr)arg[1];
7132 SizeT oldSizeB = arg[2];
7133 SizeT newSizeB = arg[3];
7134 UInt rzB = arg[4];
7136 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
7137 return True;
7139 case VG_USERREQ__FREELIKE_BLOCK: {
7140 Addr p = (Addr)arg[1];
7141 UInt rzB = arg[2];
7143 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
7144 return True;
7147 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
7148 HChar* s = (HChar*)arg[1];
7149 Addr dst = (Addr) arg[2];
7150 Addr src = (Addr) arg[3];
7151 SizeT len = (SizeT)arg[4];
7152 MC_(record_overlap_error)(tid, s, src, dst, len);
7153 return True;
7156 case VG_USERREQ__CREATE_MEMPOOL: {
7157 Addr pool = (Addr)arg[1];
7158 UInt rzB = arg[2];
7159 Bool is_zeroed = (Bool)arg[3];
7160 UInt flags = arg[4];
7162 // The create_mempool function does not know these mempool flags,
7163 // pass as booleans.
7164 MC_(create_mempool) ( pool, rzB, is_zeroed,
7165 (flags & VALGRIND_MEMPOOL_AUTO_FREE),
7166 (flags & VALGRIND_MEMPOOL_METAPOOL) );
7167 return True;
7170 case VG_USERREQ__DESTROY_MEMPOOL: {
7171 Addr pool = (Addr)arg[1];
7173 MC_(destroy_mempool) ( pool );
7174 return True;
7177 case VG_USERREQ__MEMPOOL_ALLOC: {
7178 Addr pool = (Addr)arg[1];
7179 Addr addr = (Addr)arg[2];
7180 UInt size = arg[3];
7182 MC_(mempool_alloc) ( tid, pool, addr, size );
7183 return True;
7186 case VG_USERREQ__MEMPOOL_FREE: {
7187 Addr pool = (Addr)arg[1];
7188 Addr addr = (Addr)arg[2];
7190 MC_(mempool_free) ( pool, addr );
7191 return True;
7194 case VG_USERREQ__MEMPOOL_TRIM: {
7195 Addr pool = (Addr)arg[1];
7196 Addr addr = (Addr)arg[2];
7197 UInt size = arg[3];
7199 MC_(mempool_trim) ( pool, addr, size );
7200 return True;
7203 case VG_USERREQ__MOVE_MEMPOOL: {
7204 Addr poolA = (Addr)arg[1];
7205 Addr poolB = (Addr)arg[2];
7207 MC_(move_mempool) ( poolA, poolB );
7208 return True;
7211 case VG_USERREQ__MEMPOOL_CHANGE: {
7212 Addr pool = (Addr)arg[1];
7213 Addr addrA = (Addr)arg[2];
7214 Addr addrB = (Addr)arg[3];
7215 UInt size = arg[4];
7217 MC_(mempool_change) ( pool, addrA, addrB, size );
7218 return True;
7221 case VG_USERREQ__MEMPOOL_EXISTS: {
7222 Addr pool = (Addr)arg[1];
7224 *ret = (UWord) MC_(mempool_exists) ( pool );
7225 return True;
7228 case VG_USERREQ__GDB_MONITOR_COMMAND: {
7229 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
7230 if (handled)
7231 *ret = 1;
7232 else
7233 *ret = 0;
7234 return handled;
7237 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
7238 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
7239 Bool addRange
7240 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
7241 Bool ok
7242 = modify_ignore_ranges(addRange, arg[1], arg[2]);
7243 *ret = ok ? 1 : 0;
7244 return True;
7247 default:
7248 VG_(message)(
7249 Vg_UserMsg,
7250 "Warning: unknown memcheck client request code %llx\n",
7251 (ULong)arg[0]
7253 return False;
7255 return True;
7259 /*------------------------------------------------------------*/
7260 /*--- Crude profiling machinery. ---*/
7261 /*------------------------------------------------------------*/
7263 // We track a number of interesting events (using PROF_EVENT)
7264 // if MC_PROFILE_MEMORY is defined.
7266 #ifdef MC_PROFILE_MEMORY
7268 ULong MC_(event_ctr)[MCPE_LAST];
7270 /* Event counter names. Use the name of the function that increases the
7271 event counter. Drop any MC_() and mc_ prefices. */
7272 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
7273 [MCPE_LOADVN_SLOW] = "LOADVn_slow",
7274 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
7275 [MCPE_STOREVN_SLOW] = "STOREVn_slow",
7276 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
7277 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
7278 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
7279 "make_aligned_word32_undefined_slow",
7280 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
7281 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
7282 "make_aligned_word64_undefined_slow",
7283 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
7284 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
7285 "make_aligned_word32_noaccess_slow",
7286 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
7287 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
7288 "make_aligned_word64_noaccess_slow",
7289 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
7290 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
7291 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
7292 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
7293 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
7294 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
7295 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
7296 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
7297 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
7298 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
7299 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
7300 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
7301 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
7302 [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
7303 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
7304 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
7305 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
7306 "is_mem_defined_comprehensive(loop)",
7307 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
7308 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
7309 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
7310 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
7311 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
7312 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
7313 "set_address_range_perms(single-secmap)",
7314 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
7315 "set_address_range_perms(startof-secmap)",
7316 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
7317 "set_address_range_perms(multiple-secmaps)",
7318 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
7319 "set_address_range_perms(dist-sm1)",
7320 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
7321 "set_address_range_perms(dist-sm2)",
7322 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
7323 "set_address_range_perms(dist-sm1-quick)",
7324 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
7325 "set_address_range_perms(dist-sm2-quick)",
7326 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
7327 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
7328 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
7329 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
7330 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
7331 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
7332 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
7333 "set_address_range_perms(loop64K-free-dist-sm)",
7334 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
7335 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256",
7336 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
7337 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
7338 [MCPE_LOADV64] = "LOADV64",
7339 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1",
7340 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2",
7341 [MCPE_STOREV64] = "STOREV64",
7342 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
7343 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
7344 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
7345 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
7346 [MCPE_LOADV32] = "LOADV32",
7347 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1",
7348 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2",
7349 [MCPE_STOREV32] = "STOREV32",
7350 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
7351 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
7352 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
7353 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
7354 [MCPE_LOADV16] = "LOADV16",
7355 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1",
7356 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2",
7357 [MCPE_STOREV16] = "STOREV16",
7358 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
7359 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
7360 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
7361 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
7362 [MCPE_LOADV8] = "LOADV8",
7363 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1",
7364 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2",
7365 [MCPE_STOREV8] = "STOREV8",
7366 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1",
7367 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2",
7368 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3",
7369 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4",
7370 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4",
7371 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8",
7372 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12",
7373 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16",
7374 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32",
7375 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
7376 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
7377 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
7378 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
7379 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4",
7380 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8",
7381 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12",
7382 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16",
7383 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32",
7384 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
7385 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
7386 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
7387 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
7388 [MCPE_NEW_MEM_STACK] = "new_mem_stack",
7389 [MCPE_DIE_MEM_STACK] = "die_mem_stack",
7390 [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o",
7391 [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o",
7392 [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
7393 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
7394 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7395 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
7396 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7397 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
7398 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7401 static void init_prof_mem ( void )
7403 Int i, name_count = 0;
7405 for (i = 0; i < MCPE_LAST; i++) {
7406 MC_(event_ctr)[i] = 0;
7407 if (MC_(event_ctr_name)[i] != NULL)
7408 ++name_count;
7411 /* Make sure every profiling event has a name */
7412 tl_assert(name_count == MCPE_LAST);
7415 static void done_prof_mem ( void )
7417 Int i, n;
7418 Bool spaced = False;
7419 for (i = n = 0; i < MCPE_LAST; i++) {
7420 if (!spaced && (n % 10) == 0) {
7421 VG_(printf)("\n");
7422 spaced = True;
7424 if (MC_(event_ctr)[i] > 0) {
7425 spaced = False;
7426 ++n;
7427 VG_(printf)( "prof mem event %3d: %11llu %s\n",
7428 i, MC_(event_ctr)[i],
7429 MC_(event_ctr_name)[i]);
7434 #else
7436 static void init_prof_mem ( void ) { }
7437 static void done_prof_mem ( void ) { }
7439 #endif
7442 /*------------------------------------------------------------*/
7443 /*--- Origin tracking stuff ---*/
7444 /*------------------------------------------------------------*/
7446 /*--------------------------------------------*/
7447 /*--- Origin tracking: load handlers ---*/
7448 /*--------------------------------------------*/
7450 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7451 return or1 > or2 ? or1 : or2;
7454 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7455 OCacheLine* line;
7456 UChar descr;
7457 UWord lineoff = oc_line_offset(a);
7458 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7460 if (OC_ENABLE_ASSERTIONS) {
7461 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7464 line = find_OCacheLine( a );
7466 descr = line->descr[lineoff];
7467 if (OC_ENABLE_ASSERTIONS) {
7468 tl_assert(descr < 0x10);
7471 if (LIKELY(0 == (descr & (1 << byteoff)))) {
7472 return 0;
7473 } else {
7474 return line->w32[lineoff];
7478 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7479 OCacheLine* line;
7480 UChar descr;
7481 UWord lineoff, byteoff;
7483 if (UNLIKELY(a & 1)) {
7484 /* Handle misaligned case, slowly. */
7485 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
7486 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
7487 return merge_origins(oLo, oHi);
7490 lineoff = oc_line_offset(a);
7491 byteoff = a & 3; /* 0 or 2 */
7493 if (OC_ENABLE_ASSERTIONS) {
7494 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7496 line = find_OCacheLine( a );
7498 descr = line->descr[lineoff];
7499 if (OC_ENABLE_ASSERTIONS) {
7500 tl_assert(descr < 0x10);
7503 if (LIKELY(0 == (descr & (3 << byteoff)))) {
7504 return 0;
7505 } else {
7506 return line->w32[lineoff];
7510 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7511 OCacheLine* line;
7512 UChar descr;
7513 UWord lineoff;
7515 if (UNLIKELY(a & 3)) {
7516 /* Handle misaligned case, slowly. */
7517 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
7518 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
7519 return merge_origins(oLo, oHi);
7522 lineoff = oc_line_offset(a);
7523 if (OC_ENABLE_ASSERTIONS) {
7524 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7527 line = find_OCacheLine( a );
7529 descr = line->descr[lineoff];
7530 if (OC_ENABLE_ASSERTIONS) {
7531 tl_assert(descr < 0x10);
7534 if (LIKELY(0 == descr)) {
7535 return 0;
7536 } else {
7537 return line->w32[lineoff];
7541 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7542 OCacheLine* line;
7543 UChar descrLo, descrHi, descr;
7544 UWord lineoff;
7546 if (UNLIKELY(a & 7)) {
7547 /* Handle misaligned case, slowly. */
7548 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
7549 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
7550 return merge_origins(oLo, oHi);
7553 lineoff = oc_line_offset(a);
7554 if (OC_ENABLE_ASSERTIONS) {
7555 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7558 line = find_OCacheLine( a );
7560 descrLo = line->descr[lineoff + 0];
7561 descrHi = line->descr[lineoff + 1];
7562 descr = descrLo | descrHi;
7563 if (OC_ENABLE_ASSERTIONS) {
7564 tl_assert(descr < 0x10);
7567 if (LIKELY(0 == descr)) {
7568 return 0; /* both 32-bit chunks are defined */
7569 } else {
7570 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
7571 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
7572 return merge_origins(oLo, oHi);
7576 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7577 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
7578 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
7579 UInt oBoth = merge_origins(oLo, oHi);
7580 return (UWord)oBoth;
7583 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7584 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
7585 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
7586 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
7587 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
7588 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
7589 merge_origins(oQ2, oQ3));
7590 return (UWord)oAll;
7594 /*--------------------------------------------*/
7595 /*--- Origin tracking: store handlers ---*/
7596 /*--------------------------------------------*/
7598 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7599 OCacheLine* line;
7600 UWord lineoff = oc_line_offset(a);
7601 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7603 if (OC_ENABLE_ASSERTIONS) {
7604 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7607 line = find_OCacheLine( a );
7609 if (d32 == 0) {
7610 line->descr[lineoff] &= ~(1 << byteoff);
7611 } else {
7612 line->descr[lineoff] |= (1 << byteoff);
7613 line->w32[lineoff] = d32;
7617 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7618 OCacheLine* line;
7619 UWord lineoff, byteoff;
7621 if (UNLIKELY(a & 1)) {
7622 /* Handle misaligned case, slowly. */
7623 MC_(helperc_b_store1)( a + 0, d32 );
7624 MC_(helperc_b_store1)( a + 1, d32 );
7625 return;
7628 lineoff = oc_line_offset(a);
7629 byteoff = a & 3; /* 0 or 2 */
7631 if (OC_ENABLE_ASSERTIONS) {
7632 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7635 line = find_OCacheLine( a );
7637 if (d32 == 0) {
7638 line->descr[lineoff] &= ~(3 << byteoff);
7639 } else {
7640 line->descr[lineoff] |= (3 << byteoff);
7641 line->w32[lineoff] = d32;
7645 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7646 OCacheLine* line;
7647 UWord lineoff;
7649 if (UNLIKELY(a & 3)) {
7650 /* Handle misaligned case, slowly. */
7651 MC_(helperc_b_store2)( a + 0, d32 );
7652 MC_(helperc_b_store2)( a + 2, d32 );
7653 return;
7656 lineoff = oc_line_offset(a);
7657 if (OC_ENABLE_ASSERTIONS) {
7658 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7661 line = find_OCacheLine( a );
7663 if (d32 == 0) {
7664 line->descr[lineoff] = 0;
7665 } else {
7666 line->descr[lineoff] = 0xF;
7667 line->w32[lineoff] = d32;
7671 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7672 OCacheLine* line;
7673 UWord lineoff;
7675 if (UNLIKELY(a & 7)) {
7676 /* Handle misaligned case, slowly. */
7677 MC_(helperc_b_store4)( a + 0, d32 );
7678 MC_(helperc_b_store4)( a + 4, d32 );
7679 return;
7682 lineoff = oc_line_offset(a);
7683 if (OC_ENABLE_ASSERTIONS) {
7684 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7687 line = find_OCacheLine( a );
7689 if (d32 == 0) {
7690 line->descr[lineoff + 0] = 0;
7691 line->descr[lineoff + 1] = 0;
7692 } else {
7693 line->descr[lineoff + 0] = 0xF;
7694 line->descr[lineoff + 1] = 0xF;
7695 line->w32[lineoff + 0] = d32;
7696 line->w32[lineoff + 1] = d32;
7700 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7701 MC_(helperc_b_store8)( a + 0, d32 );
7702 MC_(helperc_b_store8)( a + 8, d32 );
7705 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7706 MC_(helperc_b_store8)( a + 0, d32 );
7707 MC_(helperc_b_store8)( a + 8, d32 );
7708 MC_(helperc_b_store8)( a + 16, d32 );
7709 MC_(helperc_b_store8)( a + 24, d32 );
7713 /*--------------------------------------------*/
7714 /*--- Origin tracking: sarp handlers ---*/
7715 /*--------------------------------------------*/
7717 __attribute__((noinline))
7718 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
7719 if ((a & 1) && len >= 1) {
7720 MC_(helperc_b_store1)( a, otag );
7721 a++;
7722 len--;
7724 if ((a & 2) && len >= 2) {
7725 MC_(helperc_b_store2)( a, otag );
7726 a += 2;
7727 len -= 2;
7729 if (len >= 4)
7730 tl_assert(0 == (a & 3));
7731 while (len >= 4) {
7732 MC_(helperc_b_store4)( a, otag );
7733 a += 4;
7734 len -= 4;
7736 if (len >= 2) {
7737 MC_(helperc_b_store2)( a, otag );
7738 a += 2;
7739 len -= 2;
7741 if (len >= 1) {
7742 MC_(helperc_b_store1)( a, otag );
7743 //a++;
7744 len--;
7746 tl_assert(len == 0);
7749 __attribute__((noinline))
7750 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
7751 if ((a & 1) && len >= 1) {
7752 MC_(helperc_b_store1)( a, 0 );
7753 a++;
7754 len--;
7756 if ((a & 2) && len >= 2) {
7757 MC_(helperc_b_store2)( a, 0 );
7758 a += 2;
7759 len -= 2;
7761 if (len >= 4)
7762 tl_assert(0 == (a & 3));
7763 while (len >= 4) {
7764 MC_(helperc_b_store4)( a, 0 );
7765 a += 4;
7766 len -= 4;
7768 if (len >= 2) {
7769 MC_(helperc_b_store2)( a, 0 );
7770 a += 2;
7771 len -= 2;
7773 if (len >= 1) {
7774 MC_(helperc_b_store1)( a, 0 );
7775 //a++;
7776 len--;
7778 tl_assert(len == 0);
7782 /*------------------------------------------------------------*/
7783 /*--- Setup and finalisation ---*/
7784 /*------------------------------------------------------------*/
7786 static void mc_post_clo_init ( void )
7788 /* If we've been asked to emit XML, mash around various other
7789 options so as to constrain the output somewhat. */
7790 if (VG_(clo_xml)) {
7791 /* Extract as much info as possible from the leak checker. */
7792 MC_(clo_leak_check) = LC_Full;
7795 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
7796 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7797 VG_(message)(Vg_UserMsg,
7798 "Warning: --freelist-big-blocks value %lld has no effect\n"
7799 "as it is >= to --freelist-vol value %lld\n",
7800 MC_(clo_freelist_big_blocks),
7801 MC_(clo_freelist_vol));
7804 if (MC_(clo_workaround_gcc296_bugs)
7805 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7806 VG_(umsg)(
7807 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7808 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7809 "\n"
7813 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
7815 if (MC_(clo_mc_level) == 3) {
7816 /* We're doing origin tracking. */
7817 # ifdef PERF_FAST_STACK
7818 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
7819 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
7820 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
7821 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
7822 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
7823 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
7824 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
7825 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
7826 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
7827 # endif
7828 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
7829 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
7830 } else {
7831 /* Not doing origin tracking */
7832 # ifdef PERF_FAST_STACK
7833 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
7834 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
7835 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
7836 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
7837 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
7838 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
7839 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
7840 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
7841 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
7842 # endif
7843 VG_(track_new_mem_stack) ( mc_new_mem_stack );
7844 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
7847 // We assume that brk()/sbrk() does not initialise new memory. Is this
7848 // accurate? John Reiser says:
7850 // 0) sbrk() can *decrease* process address space. No zero fill is done
7851 // for a decrease, not even the fragment on the high end of the last page
7852 // that is beyond the new highest address. For maximum safety and
7853 // portability, then the bytes in the last page that reside above [the
7854 // new] sbrk(0) should be considered to be uninitialized, but in practice
7855 // it is exceedingly likely that they will retain their previous
7856 // contents.
7858 // 1) If an increase is large enough to require new whole pages, then
7859 // those new whole pages (like all new pages) are zero-filled by the
7860 // operating system. So if sbrk(0) already is page aligned, then
7861 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7863 // 2) Any increase that lies within an existing allocated page is not
7864 // changed. So if (x = sbrk(0)) is not page aligned, then
7865 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7866 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7867 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7868 // of them come along for the ride because the operating system deals
7869 // only in whole pages. Again, for maximum safety and portability, then
7870 // anything that lives above [the new] sbrk(0) should be considered
7871 // uninitialized, but in practice will retain previous contents [zero in
7872 // this case.]"
7874 // In short:
7876 // A key property of sbrk/brk is that new whole pages that are supplied
7877 // by the operating system *do* get initialized to zero.
7879 // As for the portability of all this:
7881 // sbrk and brk are not POSIX. However, any system that is a derivative
7882 // of *nix has sbrk and brk because there are too many software (such as
7883 // the Bourne shell) which rely on the traditional memory map (.text,
7884 // .data+.bss, stack) and the existence of sbrk/brk.
7886 // So we should arguably observe all this. However:
7887 // - The current inaccuracy has caused maybe one complaint in seven years(?)
7888 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7889 // doubt most programmers know the above information.
7890 // So I'm not terribly unhappy with marking it as undefined. --njn.
7892 // [More: I think most of what John said only applies to sbrk(). It seems
7893 // that brk() always deals in whole pages. And since this event deals
7894 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7895 // just mark all memory it allocates as defined.]
7897 # if !defined(VGO_solaris)
7898 if (MC_(clo_mc_level) == 3)
7899 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
7900 else
7901 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
7902 # else
7903 // On Solaris, brk memory has to be marked as defined, otherwise we get
7904 // many false positives.
7905 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
7906 # endif
7908 /* This origin tracking cache is huge (~100M), so only initialise
7909 if we need it. */
7910 if (MC_(clo_mc_level) >= 3) {
7911 init_OCache();
7912 tl_assert(ocacheL1 != NULL);
7913 tl_assert(ocacheL2 != NULL);
7914 } else {
7915 tl_assert(ocacheL1 == NULL);
7916 tl_assert(ocacheL2 == NULL);
7919 MC_(chunk_poolalloc) = VG_(newPA)
7920 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
7921 1000,
7922 VG_(malloc),
7923 "mc.cMC.1 (MC_Chunk pools)",
7924 VG_(free));
7926 /* Do not check definedness of guest state if --undef-value-errors=no */
7927 if (MC_(clo_mc_level) >= 2)
7928 VG_(track_pre_reg_read) ( mc_pre_reg_read );
7930 if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
7931 if (MC_(clo_keep_stacktraces) == KS_none
7932 || MC_(clo_keep_stacktraces) == KS_free)
7933 VG_(fmsg_bad_option)("--keep-stacktraces",
7934 "To use --xtree-memory=full, you must"
7935 " keep at least the alloc stacktrace\n");
7936 // Activate full xtree memory profiling.
7937 VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
7942 static void print_SM_info(const HChar* type, Int n_SMs)
7944 VG_(message)(Vg_DebugMsg,
7945 " memcheck: SMs: %s = %d (%luk, %luM)\n",
7946 type,
7947 n_SMs,
7948 n_SMs * sizeof(SecMap) / 1024UL,
7949 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
7952 static void mc_print_stats (void)
7954 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
7956 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
7957 VG_(free_queue_volume), VG_(free_queue_length));
7958 VG_(message)(Vg_DebugMsg,
7959 " memcheck: sanity checks: %d cheap, %d expensive\n",
7960 n_sanity_cheap, n_sanity_expensive );
7961 VG_(message)(Vg_DebugMsg,
7962 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7963 n_auxmap_L2_nodes,
7964 n_auxmap_L2_nodes * 64,
7965 n_auxmap_L2_nodes / 16 );
7966 VG_(message)(Vg_DebugMsg,
7967 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7968 n_auxmap_L1_searches, n_auxmap_L1_cmps,
7969 (10ULL * n_auxmap_L1_cmps)
7970 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
7972 VG_(message)(Vg_DebugMsg,
7973 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7974 n_auxmap_L2_searches, n_auxmap_L2_nodes
7977 print_SM_info("n_issued ", n_issued_SMs);
7978 print_SM_info("n_deissued ", n_deissued_SMs);
7979 print_SM_info("max_noaccess ", max_noaccess_SMs);
7980 print_SM_info("max_undefined", max_undefined_SMs);
7981 print_SM_info("max_defined ", max_defined_SMs);
7982 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
7984 // Three DSMs, plus the non-DSM ones
7985 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
7986 // The 3*sizeof(Word) bytes is the AVL node metadata size.
7987 // The VG_ROUNDUP is because the OSet pool allocator will/must align
7988 // the elements on pointer size.
7989 // Note that the pool allocator has some additional small overhead
7990 // which is not counted in the below.
7991 // Hardwiring this logic sucks, but I don't see how else to do it.
7992 max_secVBit_szB = max_secVBit_nodes *
7993 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
7994 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
7996 VG_(message)(Vg_DebugMsg,
7997 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
7998 max_secVBit_nodes, max_secVBit_szB / 1024,
7999 max_secVBit_szB / (1024 * 1024));
8000 VG_(message)(Vg_DebugMsg,
8001 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8002 sec_vbits_new_nodes + sec_vbits_updates,
8003 sec_vbits_new_nodes, sec_vbits_updates );
8004 VG_(message)(Vg_DebugMsg,
8005 " memcheck: max shadow mem size: %luk, %luM\n",
8006 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
8008 if (MC_(clo_mc_level) >= 3) {
8009 VG_(message)(Vg_DebugMsg,
8010 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
8011 stats_ocacheL1_find,
8012 stats_ocacheL1_misses,
8013 stats_ocacheL1_lossage );
8014 VG_(message)(Vg_DebugMsg,
8015 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
8016 stats_ocacheL1_find - stats_ocacheL1_misses
8017 - stats_ocacheL1_found_at_1
8018 - stats_ocacheL1_found_at_N,
8019 stats_ocacheL1_found_at_1 );
8020 VG_(message)(Vg_DebugMsg,
8021 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
8022 stats_ocacheL1_found_at_N,
8023 stats_ocacheL1_movefwds );
8024 VG_(message)(Vg_DebugMsg,
8025 " ocacheL1: %'12lu sizeB %'12d useful\n",
8026 (SizeT)sizeof(OCache),
8027 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
8028 VG_(message)(Vg_DebugMsg,
8029 " ocacheL2: %'12lu refs %'12lu misses\n",
8030 stats__ocacheL2_refs,
8031 stats__ocacheL2_misses );
8032 VG_(message)(Vg_DebugMsg,
8033 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8034 stats__ocacheL2_n_nodes_max,
8035 stats__ocacheL2_n_nodes );
8036 VG_(message)(Vg_DebugMsg,
8037 " niacache: %'12lu refs %'12lu misses\n",
8038 stats__nia_cache_queries, stats__nia_cache_misses);
8039 } else {
8040 tl_assert(ocacheL1 == NULL);
8041 tl_assert(ocacheL2 == NULL);
8046 static void mc_fini ( Int exitcode )
8048 MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
8049 MC_(print_malloc_stats)();
8051 if (MC_(clo_leak_check) != LC_Off) {
8052 LeakCheckParams lcp;
8053 HChar* xt_filename = NULL;
8054 lcp.mode = MC_(clo_leak_check);
8055 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
8056 lcp.heuristics = MC_(clo_leak_check_heuristics);
8057 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
8058 lcp.deltamode = LCD_Any;
8059 lcp.max_loss_records_output = 999999999;
8060 lcp.requested_by_monitor_command = False;
8061 if (MC_(clo_xtree_leak)) {
8062 xt_filename = VG_(expand_file_name)("--xtree-leak-file",
8063 MC_(clo_xtree_leak_file));
8064 lcp.xt_filename = xt_filename;
8065 lcp.mode = LC_Full;
8067 else
8068 lcp.xt_filename = NULL;
8069 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
8070 if (MC_(clo_xtree_leak))
8071 VG_(free)(xt_filename);
8072 } else {
8073 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8074 VG_(umsg)(
8075 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8076 "\n"
8081 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8082 VG_(message)(Vg_UserMsg,
8083 "For counts of detected and suppressed errors, rerun with: -v\n");
8086 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
8087 && MC_(clo_mc_level) == 2) {
8088 VG_(message)(Vg_UserMsg,
8089 "Use --track-origins=yes to see where "
8090 "uninitialised values come from\n");
8093 /* Print a warning if any client-request generated ignore-ranges
8094 still exist. It would be reasonable to expect that a properly
8095 written program would remove any such ranges before exiting, and
8096 since they are a bit on the dangerous side, let's comment. By
8097 contrast ranges which are specified on the command line normally
8098 pertain to hardware mapped into the address space, and so we
8099 can't expect the client to have got rid of them. */
8100 if (gIgnoredAddressRanges) {
8101 UInt i, nBad = 0;
8102 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
8103 UWord val = IAR_INVALID;
8104 UWord key_min = ~(UWord)0;
8105 UWord key_max = (UWord)0;
8106 VG_(indexRangeMap)( &key_min, &key_max, &val,
8107 gIgnoredAddressRanges, i );
8108 if (val != IAR_ClientReq)
8109 continue;
8110 /* Print the offending range. Also, if it is the first,
8111 print a banner before it. */
8112 nBad++;
8113 if (nBad == 1) {
8114 VG_(umsg)(
8115 "WARNING: exiting program has the following client-requested\n"
8116 "WARNING: address error disablement range(s) still in force,\n"
8117 "WARNING: "
8118 "possibly as a result of some mistake in the use of the\n"
8119 "WARNING: "
8120 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8123 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n",
8124 i, key_min, key_max, showIARKind(val));
8128 done_prof_mem();
8130 if (VG_(clo_stats))
8131 mc_print_stats();
8133 if (0) {
8134 VG_(message)(Vg_DebugMsg,
8135 "------ Valgrind's client block stats follow ---------------\n" );
8136 show_client_block_stats();
8140 /* mark the given addr/len unaddressable for watchpoint implementation
8141 The PointKind will be handled at access time */
8142 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
8143 Addr addr, SizeT len)
8145 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8146 accessibility and definedness in gdbserver so as to allow restoring it
8147 properly. Currently, we assume that the user only watches things
8148 which are properly addressable and defined */
8149 if (insert)
8150 MC_(make_mem_noaccess) (addr, len);
8151 else
8152 MC_(make_mem_defined) (addr, len);
8153 return True;
8156 static void mc_pre_clo_init(void)
8158 VG_(details_name) ("Memcheck");
8159 VG_(details_version) (NULL);
8160 VG_(details_description) ("a memory error detector");
8161 VG_(details_copyright_author)(
8162 "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
8163 VG_(details_bug_reports_to) (VG_BUGS_TO);
8164 VG_(details_avg_translation_sizeB) ( 640 );
8166 VG_(basic_tool_funcs) (mc_post_clo_init,
8167 MC_(instrument),
8168 mc_fini);
8170 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
8173 VG_(needs_core_errors) ();
8174 VG_(needs_tool_errors) (MC_(eq_Error),
8175 MC_(before_pp_Error),
8176 MC_(pp_Error),
8177 True,/*show TIDs for errors*/
8178 MC_(update_Error_extra),
8179 MC_(is_recognised_suppression),
8180 MC_(read_extra_suppression_info),
8181 MC_(error_matches_suppression),
8182 MC_(get_error_name),
8183 MC_(get_extra_suppression_info),
8184 MC_(print_extra_suppression_use),
8185 MC_(update_extra_suppression_use));
8186 VG_(needs_libc_freeres) ();
8187 VG_(needs_cxx_freeres) ();
8188 VG_(needs_command_line_options)(mc_process_cmd_line_options,
8189 mc_print_usage,
8190 mc_print_debug_usage);
8191 VG_(needs_client_requests) (mc_handle_client_request);
8192 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
8193 mc_expensive_sanity_check);
8194 VG_(needs_print_stats) (mc_print_stats);
8195 VG_(needs_info_location) (MC_(pp_describe_addr));
8196 VG_(needs_malloc_replacement) (MC_(malloc),
8197 MC_(__builtin_new),
8198 MC_(__builtin_vec_new),
8199 MC_(memalign),
8200 MC_(calloc),
8201 MC_(free),
8202 MC_(__builtin_delete),
8203 MC_(__builtin_vec_delete),
8204 MC_(realloc),
8205 MC_(malloc_usable_size),
8206 MC_MALLOC_DEFAULT_REDZONE_SZB );
8207 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
8209 VG_(needs_xml_output) ();
8211 VG_(track_new_mem_startup) ( mc_new_mem_startup );
8213 // Handling of mmap and mprotect isn't simple (well, it is simple,
8214 // but the justification isn't.) See comments above, just prior to
8215 // mc_new_mem_mmap.
8216 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
8217 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
8219 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
8221 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
8222 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
8223 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
8225 /* Defer the specification of the new_mem_stack functions to the
8226 post_clo_init function, since we need to first parse the command
8227 line before deciding which set to use. */
8229 # ifdef PERF_FAST_STACK
8230 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
8231 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
8232 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
8233 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
8234 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
8235 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
8236 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
8237 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
8238 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
8239 # endif
8240 VG_(track_die_mem_stack) ( mc_die_mem_stack );
8242 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
8244 VG_(track_pre_mem_read) ( check_mem_is_defined );
8245 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
8246 VG_(track_pre_mem_write) ( check_mem_is_addressable );
8247 VG_(track_post_mem_write) ( mc_post_mem_write );
8249 VG_(track_post_reg_write) ( mc_post_reg_write );
8250 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
8252 if (MC_(clo_mc_level) >= 2) {
8253 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
8254 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
8257 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
8259 init_shadow_memory();
8260 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8261 tl_assert(MC_(chunk_poolalloc) == NULL);
8262 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
8263 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
8264 init_prof_mem();
8266 tl_assert( mc_expensive_sanity_check() );
8268 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8269 tl_assert(sizeof(UWord) == sizeof(Addr));
8270 // Call me paranoid. I don't care.
8271 tl_assert(sizeof(void*) == sizeof(Addr));
8273 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8274 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
8276 /* This is small. Always initialise it. */
8277 init_nia_to_ecu_cache();
8279 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8280 if we need to, since the command line args haven't been
8281 processed yet. Hence defer it to mc_post_clo_init. */
8282 tl_assert(ocacheL1 == NULL);
8283 tl_assert(ocacheL2 == NULL);
8285 /* Check some important stuff. See extensive comments above
8286 re UNALIGNED_OR_HIGH for background. */
8287 # if VG_WORDSIZE == 4
8288 tl_assert(sizeof(void*) == 4);
8289 tl_assert(sizeof(Addr) == 4);
8290 tl_assert(sizeof(UWord) == 4);
8291 tl_assert(sizeof(Word) == 4);
8292 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
8293 tl_assert(MASK(1) == 0UL);
8294 tl_assert(MASK(2) == 1UL);
8295 tl_assert(MASK(4) == 3UL);
8296 tl_assert(MASK(8) == 7UL);
8297 # else
8298 tl_assert(VG_WORDSIZE == 8);
8299 tl_assert(sizeof(void*) == 8);
8300 tl_assert(sizeof(Addr) == 8);
8301 tl_assert(sizeof(UWord) == 8);
8302 tl_assert(sizeof(Word) == 8);
8303 tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
8304 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
8305 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
8306 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
8307 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
8308 # endif
8310 /* Check some assertions to do with the instrumentation machinery. */
8311 MC_(do_instrumentation_startup_checks)();
8314 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
8316 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
8318 /*--------------------------------------------------------------------*/
8319 /*--- end mc_main.c ---*/
8320 /*--------------------------------------------------------------------*/