FreeBSD regtest: feature test for aio_readv
[valgrind.git] / memcheck / mc_main.c
blobea5637e561889d56ef3a219c960188298bd774a6
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
5 /*--- accessibility (A) and validity (V) status of each byte. ---*/
6 /*--- mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
9 /*
10 This file is part of MemCheck, a heavyweight Valgrind tool for
11 detecting memory errors.
13 Copyright (C) 2000-2017 Julian Seward
14 jseward@acm.org
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, see <http://www.gnu.org/licenses/>.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_aspacemgr.h"
34 #include "pub_tool_gdbserver.h"
35 #include "pub_tool_poolalloc.h"
36 #include "pub_tool_hashtable.h" // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_rangemap.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48 #include "pub_tool_xarray.h"
49 #include "pub_tool_xtree.h"
50 #include "pub_tool_xtmemory.h"
52 #include "mc_include.h"
53 #include "memcheck.h" /* for client requests */
55 /* Set to 1 to do a little more sanity checking */
56 #define VG_DEBUG_MEMORY 0
58 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
60 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
61 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
64 /*------------------------------------------------------------*/
65 /*--- Fast-case knobs ---*/
66 /*------------------------------------------------------------*/
68 // Comment these out to disable the fast cases (don't just set them to zero).
70 /* PERF_FAST_LOADV is in mc_include.h */
71 #define PERF_FAST_STOREV 1
73 #define PERF_FAST_SARP 1
75 #define PERF_FAST_STACK 1
76 #define PERF_FAST_STACK2 1
78 /* Change this to 1 to enable assertions on origin tracking cache fast
79 paths */
80 #define OC_ENABLE_ASSERTIONS 0
82 /* Change this to 1 for experimental, higher precision origin tracking
83 8- and 16-bit store handling. */
84 #define OC_PRECISION_STORE 1
87 /*------------------------------------------------------------*/
88 /*--- Comments on the origin tracking implementation ---*/
89 /*------------------------------------------------------------*/
91 /* See detailed comment entitled
92 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
93 which is contained further on in this file. */
96 /*------------------------------------------------------------*/
97 /*--- V bits and A bits ---*/
98 /*------------------------------------------------------------*/
100 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
101 thinks the corresponding value bit is defined. And every memory byte
102 has an A bit, which tracks whether Memcheck thinks the program can access
103 it safely (ie. it's mapped, and has at least one of the RWX permission bits
104 set). So every N-bit register is shadowed with N V bits, and every memory
105 byte is shadowed with 8 V bits and one A bit.
107 In the implementation, we use two forms of compression (compressed V bits
108 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
109 for memory.
111 Memcheck also tracks extra information about each heap block that is
112 allocated, for detecting memory leaks and other purposes.
115 /*------------------------------------------------------------*/
116 /*--- Basic A/V bitmap representation. ---*/
117 /*------------------------------------------------------------*/
119 /* All reads and writes are checked against a memory map (a.k.a. shadow
120 memory), which records the state of all memory in the process.
122 On 32-bit machines the memory map is organised as follows.
123 The top 16 bits of an address are used to index into a top-level
124 map table, containing 65536 entries. Each entry is a pointer to a
125 second-level map, which records the accesibililty and validity
126 permissions for the 65536 bytes indexed by the lower 16 bits of the
127 address. Each byte is represented by two bits (details are below). So
128 each second-level map contains 16384 bytes. This two-level arrangement
129 conveniently divides the 4G address space into 64k lumps, each size 64k
130 bytes.
132 All entries in the primary (top-level) map must point to a valid
133 secondary (second-level) map. Since many of the 64kB chunks will
134 have the same status for every bit -- ie. noaccess (for unused
135 address space) or entirely addressable and defined (for code segments) --
136 there are three distinguished secondary maps, which indicate 'noaccess',
137 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
138 map entry points to the relevant distinguished map. In practice,
139 typically more than half of the addressable memory is represented with
140 the 'undefined' or 'defined' distinguished secondary map, so it gives a
141 good saving. It also lets us set the V+A bits of large address regions
142 quickly in set_address_range_perms().
144 On 64-bit machines it's more complicated. If we followed the same basic
145 scheme we'd have a four-level table which would require too many memory
146 accesses. So instead the top-level map table has 2^20 entries (indexed
147 using bits 16..35 of the address); this covers the bottom 64GB. Any
148 accesses above 64GB are handled with a slow, sparse auxiliary table.
149 Valgrind's address space manager tries very hard to keep things below
150 this 64GB barrier so that performance doesn't suffer too much.
152 Note that this file has a lot of different functions for reading and
153 writing shadow memory. Only a couple are strictly necessary (eg.
154 get_vabits2 and set_vabits2), most are just specialised for specific
155 common cases to improve performance.
157 Aside: the V+A bits are less precise than they could be -- we have no way
158 of marking memory as read-only. It would be great if we could add an
159 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
160 which requires 2.3 bits to hold, and there's no way to do that elegantly
161 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
162 seem worth it.
165 /* --------------- Basic configuration --------------- */
167 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
169 #if VG_WORDSIZE == 4
171 /* cover the entire address space */
172 # define N_PRIMARY_BITS 16
174 #else
176 /* Just handle the first 128G fast and the rest via auxiliary
177 primaries. If you change this, Memcheck will assert at startup.
178 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
179 # define N_PRIMARY_BITS 21
181 #endif
184 /* Do not change this. */
185 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
187 /* Do not change this. */
188 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
191 /* --------------- Secondary maps --------------- */
193 // Each byte of memory conceptually has an A bit, which indicates its
194 // addressability, and 8 V bits, which indicates its definedness.
196 // But because very few bytes are partially defined, we can use a nice
197 // compression scheme to reduce the size of shadow memory. Each byte of
198 // memory has 2 bits which indicates its state (ie. V+A bits):
200 // 00: noaccess (unaddressable but treated as fully defined)
201 // 01: undefined (addressable and fully undefined)
202 // 10: defined (addressable and fully defined)
203 // 11: partdefined (addressable and partially defined)
205 // In the "partdefined" case, we use a secondary table to store the V bits.
206 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
207 // bits.
209 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
210 // four bytes (32 bits) of memory are in each chunk. Hence the name
211 // "vabits8". This lets us get the V+A bits for four bytes at a time
212 // easily (without having to do any shifting and/or masking), and that is a
213 // very common operation. (Note that although each vabits8 chunk
214 // is 8 bits in size, it represents 32 bits of memory.)
216 // The representation is "inverse" little-endian... each 4 bytes of
217 // memory is represented by a 1 byte value, where:
219 // - the status of byte (a+0) is held in bits [1..0]
220 // - the status of byte (a+1) is held in bits [3..2]
221 // - the status of byte (a+2) is held in bits [5..4]
222 // - the status of byte (a+3) is held in bits [7..6]
224 // It's "inverse" because endianness normally describes a mapping from
225 // value bits to memory addresses; in this case the mapping is inverted.
226 // Ie. instead of particular value bits being held in certain addresses, in
227 // this case certain addresses are represented by particular value bits.
228 // See insert_vabits2_into_vabits8() for an example.
230 // But note that we don't compress the V bits stored in registers; they
231 // need to be explicit to made the shadow operations possible. Therefore
232 // when moving values between registers and memory we need to convert
233 // between the expanded in-register format and the compressed in-memory
234 // format. This isn't so difficult, it just requires careful attention in a
235 // few places.
237 // These represent eight bits of memory.
238 #define VA_BITS2_NOACCESS 0x0 // 00b
239 #define VA_BITS2_UNDEFINED 0x1 // 01b
240 #define VA_BITS2_DEFINED 0x2 // 10b
241 #define VA_BITS2_PARTDEFINED 0x3 // 11b
243 // These represent 16 bits of memory.
244 #define VA_BITS4_NOACCESS 0x0 // 00_00b
245 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
246 #define VA_BITS4_DEFINED 0xa // 10_10b
248 // These represent 32 bits of memory.
249 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
250 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
251 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
253 // These represent 64 bits of memory.
254 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
255 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
256 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
258 // These represent 128 bits of memory.
259 #define VA_BITS32_UNDEFINED 0x55555555 // 01_01_01_01b x 4
262 #define SM_CHUNKS 16384 // Each SM covers 64k of memory.
263 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
264 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
266 // Paranoia: it's critical for performance that the requested inlining
267 // occurs. So try extra hard.
268 #define INLINE inline __attribute__((always_inline))
270 static INLINE Addr start_of_this_sm ( Addr a ) {
271 return (a & (~SM_MASK));
273 static INLINE Bool is_start_of_sm ( Addr a ) {
274 return (start_of_this_sm(a) == a);
277 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
279 typedef
280 union {
281 UChar vabits8[SM_CHUNKS];
282 UShort vabits16[SM_CHUNKS/2];
284 SecMap;
286 // 3 distinguished secondary maps, one for no-access, one for
287 // accessible but undefined, and one for accessible and defined.
288 // Distinguished secondaries may never be modified.
289 #define SM_DIST_NOACCESS 0
290 #define SM_DIST_UNDEFINED 1
291 #define SM_DIST_DEFINED 2
293 static SecMap sm_distinguished[3];
295 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
296 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
299 // Forward declaration
300 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
302 /* dist_sm points to one of our three distinguished secondaries. Make
303 a copy of it so that we can write to it.
305 static SecMap* copy_for_writing ( SecMap* dist_sm )
307 SecMap* new_sm;
308 tl_assert(dist_sm == &sm_distinguished[0]
309 || dist_sm == &sm_distinguished[1]
310 || dist_sm == &sm_distinguished[2]);
312 SysRes sres = VG_(am_shadow_alloc)(sizeof(SecMap));
313 if (sr_isError(sres))
314 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
315 sizeof(SecMap), sr_Err(sres) );
316 new_sm = (void *)(Addr)sr_Res(sres);
317 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
318 update_SM_counts(dist_sm, new_sm);
319 return new_sm;
322 /* --------------- Stats --------------- */
324 static Int n_issued_SMs = 0;
325 static Int n_deissued_SMs = 0;
326 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
327 static Int n_undefined_SMs = 0;
328 static Int n_defined_SMs = 0;
329 static Int n_non_DSM_SMs = 0;
330 static Int max_noaccess_SMs = 0;
331 static Int max_undefined_SMs = 0;
332 static Int max_defined_SMs = 0;
333 static Int max_non_DSM_SMs = 0;
335 /* # searches initiated in auxmap_L1, and # base cmps required */
336 static ULong n_auxmap_L1_searches = 0;
337 static ULong n_auxmap_L1_cmps = 0;
338 /* # of searches that missed in auxmap_L1 and therefore had to
339 be handed to auxmap_L2. And the number of nodes inserted. */
340 static ULong n_auxmap_L2_searches = 0;
341 static ULong n_auxmap_L2_nodes = 0;
343 static Int n_sanity_cheap = 0;
344 static Int n_sanity_expensive = 0;
346 static Int n_secVBit_nodes = 0;
347 static Int max_secVBit_nodes = 0;
349 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
351 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
352 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
353 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
354 else { n_non_DSM_SMs --;
355 n_deissued_SMs ++; }
357 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
358 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
359 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
360 else { n_non_DSM_SMs ++;
361 n_issued_SMs ++; }
363 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
364 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
365 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
366 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
369 /* --------------- Primary maps --------------- */
371 /* The main primary map. This covers some initial part of the address
372 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
373 handled using the auxiliary primary map.
375 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
376 && (defined(VGP_arm_linux) \
377 || defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
378 /* mc_main_asm.c needs visibility on a few things declared in this file.
379 MC_MAIN_STATIC allows to define them static if ok, i.e. on
380 platforms that are not using hand-coded asm statements. */
381 #define MC_MAIN_STATIC
382 #else
383 #define MC_MAIN_STATIC static
384 #endif
385 MC_MAIN_STATIC SecMap* primary_map[N_PRIMARY_MAP];
388 /* An entry in the auxiliary primary map. base must be a 64k-aligned
389 value, and sm points at the relevant secondary map. As with the
390 main primary map, the secondary may be either a real secondary, or
391 one of the three distinguished secondaries. DO NOT CHANGE THIS
392 LAYOUT: the first word has to be the key for OSet fast lookups.
394 typedef
395 struct {
396 Addr base;
397 SecMap* sm;
399 AuxMapEnt;
401 /* Tunable parameter: How big is the L1 queue? */
402 #define N_AUXMAP_L1 24
404 /* Tunable parameter: How far along the L1 queue to insert
405 entries resulting from L2 lookups? */
406 #define AUXMAP_L1_INSERT_IX 12
408 static struct {
409 Addr base;
410 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
412 auxmap_L1[N_AUXMAP_L1];
414 static OSet* auxmap_L2 = NULL;
416 static void init_auxmap_L1_L2 ( void )
418 Int i;
419 for (i = 0; i < N_AUXMAP_L1; i++) {
420 auxmap_L1[i].base = 0;
421 auxmap_L1[i].ent = NULL;
424 tl_assert(0 == offsetof(AuxMapEnt,base));
425 tl_assert(sizeof(Addr) == sizeof(void*));
426 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
427 /*fastCmp*/ NULL,
428 VG_(malloc), "mc.iaLL.1", VG_(free) );
431 /* Check representation invariants; if OK return NULL; else a
432 descriptive bit of text. Also return the number of
433 non-distinguished secondary maps referred to from the auxiliary
434 primary maps. */
436 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
438 Word i, j;
439 /* On a 32-bit platform, the L2 and L1 tables should
440 both remain empty forever.
442 On a 64-bit platform:
443 In the L2 table:
444 all .base & 0xFFFF == 0
445 all .base > MAX_PRIMARY_ADDRESS
446 In the L1 table:
447 all .base & 0xFFFF == 0
448 all (.base > MAX_PRIMARY_ADDRESS
449 .base & 0xFFFF == 0
450 and .ent points to an AuxMapEnt with the same .base)
452 (.base == 0 and .ent == NULL)
454 *n_secmaps_found = 0;
455 if (sizeof(void*) == 4) {
456 /* 32-bit platform */
457 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
458 return "32-bit: auxmap_L2 is non-empty";
459 for (i = 0; i < N_AUXMAP_L1; i++)
460 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
461 return "32-bit: auxmap_L1 is non-empty";
462 } else {
463 /* 64-bit platform */
464 UWord elems_seen = 0;
465 AuxMapEnt *elem, *res;
466 AuxMapEnt key;
467 /* L2 table */
468 VG_(OSetGen_ResetIter)(auxmap_L2);
469 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
470 elems_seen++;
471 if (0 != (elem->base & (Addr)0xFFFF))
472 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
473 if (elem->base <= MAX_PRIMARY_ADDRESS)
474 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
475 if (elem->sm == NULL)
476 return "64-bit: .sm in _L2 is NULL";
477 if (!is_distinguished_sm(elem->sm))
478 (*n_secmaps_found)++;
480 if (elems_seen != n_auxmap_L2_nodes)
481 return "64-bit: disagreement on number of elems in _L2";
482 /* Check L1-L2 correspondence */
483 for (i = 0; i < N_AUXMAP_L1; i++) {
484 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
485 continue;
486 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
487 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
488 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
489 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
490 if (auxmap_L1[i].ent == NULL)
491 return "64-bit: .ent is NULL in auxmap_L1";
492 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
493 return "64-bit: _L1 and _L2 bases are inconsistent";
494 /* Look it up in auxmap_L2. */
495 key.base = auxmap_L1[i].base;
496 key.sm = 0;
497 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
498 if (res == NULL)
499 return "64-bit: _L1 .base not found in _L2";
500 if (res != auxmap_L1[i].ent)
501 return "64-bit: _L1 .ent disagrees with _L2 entry";
503 /* Check L1 contains no duplicates */
504 for (i = 0; i < N_AUXMAP_L1; i++) {
505 if (auxmap_L1[i].base == 0)
506 continue;
507 for (j = i+1; j < N_AUXMAP_L1; j++) {
508 if (auxmap_L1[j].base == 0)
509 continue;
510 if (auxmap_L1[j].base == auxmap_L1[i].base)
511 return "64-bit: duplicate _L1 .base entries";
515 return NULL; /* ok */
518 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
520 Word i;
521 tl_assert(ent);
522 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
523 for (i = N_AUXMAP_L1-1; i > rank; i--)
524 auxmap_L1[i] = auxmap_L1[i-1];
525 auxmap_L1[rank].base = ent->base;
526 auxmap_L1[rank].ent = ent;
529 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
531 AuxMapEnt key;
532 AuxMapEnt* res;
533 Word i;
535 tl_assert(a > MAX_PRIMARY_ADDRESS);
536 a &= ~(Addr)0xFFFF;
538 /* First search the front-cache, which is a self-organising
539 list containing the most popular entries. */
541 if (LIKELY(auxmap_L1[0].base == a))
542 return auxmap_L1[0].ent;
543 if (LIKELY(auxmap_L1[1].base == a)) {
544 Addr t_base = auxmap_L1[0].base;
545 AuxMapEnt* t_ent = auxmap_L1[0].ent;
546 auxmap_L1[0].base = auxmap_L1[1].base;
547 auxmap_L1[0].ent = auxmap_L1[1].ent;
548 auxmap_L1[1].base = t_base;
549 auxmap_L1[1].ent = t_ent;
550 return auxmap_L1[0].ent;
553 n_auxmap_L1_searches++;
555 for (i = 0; i < N_AUXMAP_L1; i++) {
556 if (auxmap_L1[i].base == a) {
557 break;
560 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
562 n_auxmap_L1_cmps += (ULong)(i+1);
564 if (i < N_AUXMAP_L1) {
565 if (i > 0) {
566 Addr t_base = auxmap_L1[i-1].base;
567 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
568 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
569 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
570 auxmap_L1[i-0].base = t_base;
571 auxmap_L1[i-0].ent = t_ent;
572 i--;
574 return auxmap_L1[i].ent;
577 n_auxmap_L2_searches++;
579 /* First see if we already have it. */
580 key.base = a;
581 key.sm = 0;
583 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
584 if (res)
585 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
586 return res;
589 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
591 AuxMapEnt *nyu, *res;
593 /* First see if we already have it. */
594 res = maybe_find_in_auxmap( a );
595 if (LIKELY(res))
596 return res;
598 /* Ok, there's no entry in the secondary map, so we'll have
599 to allocate one. */
600 a &= ~(Addr)0xFFFF;
602 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
603 nyu->base = a;
604 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
605 VG_(OSetGen_Insert)( auxmap_L2, nyu );
606 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
607 n_auxmap_L2_nodes++;
608 return nyu;
611 /* --------------- SecMap fundamentals --------------- */
613 // In all these, 'low' means it's definitely in the main primary map,
614 // 'high' means it's definitely in the auxiliary table.
616 static INLINE UWord get_primary_map_low_offset ( Addr a )
618 UWord pm_off = a >> 16;
619 return pm_off;
622 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
624 UWord pm_off = a >> 16;
625 # if VG_DEBUG_MEMORY >= 1
626 tl_assert(pm_off < N_PRIMARY_MAP);
627 # endif
628 return &primary_map[ pm_off ];
631 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
633 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
634 return &am->sm;
637 static INLINE SecMap** get_secmap_ptr ( Addr a )
639 return ( a <= MAX_PRIMARY_ADDRESS
640 ? get_secmap_low_ptr(a)
641 : get_secmap_high_ptr(a));
644 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
646 return *get_secmap_low_ptr(a);
649 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
651 return *get_secmap_high_ptr(a);
654 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
656 SecMap** p = get_secmap_low_ptr(a);
657 if (UNLIKELY(is_distinguished_sm(*p)))
658 *p = copy_for_writing(*p);
659 return *p;
662 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
664 SecMap** p = get_secmap_high_ptr(a);
665 if (UNLIKELY(is_distinguished_sm(*p)))
666 *p = copy_for_writing(*p);
667 return *p;
670 /* Produce the secmap for 'a', either from the primary map or by
671 ensuring there is an entry for it in the aux primary map. The
672 secmap may be a distinguished one as the caller will only want to
673 be able to read it.
675 static INLINE SecMap* get_secmap_for_reading ( Addr a )
677 return ( a <= MAX_PRIMARY_ADDRESS
678 ? get_secmap_for_reading_low (a)
679 : get_secmap_for_reading_high(a) );
682 /* Produce the secmap for 'a', either from the primary map or by
683 ensuring there is an entry for it in the aux primary map. The
684 secmap may not be a distinguished one, since the caller will want
685 to be able to write it. If it is a distinguished secondary, make a
686 writable copy of it, install it, and return the copy instead. (COW
687 semantics).
689 static INLINE SecMap* get_secmap_for_writing ( Addr a )
691 return ( a <= MAX_PRIMARY_ADDRESS
692 ? get_secmap_for_writing_low (a)
693 : get_secmap_for_writing_high(a) );
696 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
697 allocate one if one doesn't already exist. This is used by the
698 leak checker.
700 static SecMap* maybe_get_secmap_for ( Addr a )
702 if (a <= MAX_PRIMARY_ADDRESS) {
703 return get_secmap_for_reading_low(a);
704 } else {
705 AuxMapEnt* am = maybe_find_in_auxmap(a);
706 return am ? am->sm : NULL;
710 /* --------------- Fundamental functions --------------- */
712 static INLINE
713 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
715 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
716 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
717 *vabits8 |= (vabits2 << shift); // mask in the two new bits
720 static INLINE
721 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
723 UInt shift;
724 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
725 shift = (a & 2) << 1; // shift by 0 or 4
726 *vabits8 &= ~(0xf << shift); // mask out the four old bits
727 *vabits8 |= (vabits4 << shift); // mask in the four new bits
730 static INLINE
731 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
733 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
734 vabits8 >>= shift; // shift the two bits to the bottom
735 return 0x3 & vabits8; // mask out the rest
738 static INLINE
739 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
741 UInt shift;
742 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
743 shift = (a & 2) << 1; // shift by 0 or 4
744 vabits8 >>= shift; // shift the four bits to the bottom
745 return 0xf & vabits8; // mask out the rest
748 // Note that these four are only used in slow cases. The fast cases do
749 // clever things like combine the auxmap check (in
750 // get_secmap_{read,writ}able) with alignment checks.
752 // *** WARNING! ***
753 // Any time this function is called, if it is possible that vabits2
754 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
755 // sec-V-bits table must also be set!
756 static INLINE
757 void set_vabits2 ( Addr a, UChar vabits2 )
759 SecMap* sm = get_secmap_for_writing(a);
760 UWord sm_off = SM_OFF(a);
761 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
764 static INLINE
765 UChar get_vabits2 ( Addr a )
767 SecMap* sm = get_secmap_for_reading(a);
768 UWord sm_off = SM_OFF(a);
769 UChar vabits8 = sm->vabits8[sm_off];
770 return extract_vabits2_from_vabits8(a, vabits8);
773 // *** WARNING! ***
774 // Any time this function is called, if it is possible that any of the
775 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
776 // corresponding entry(s) in the sec-V-bits table must also be set!
777 static INLINE
778 UChar get_vabits8_for_aligned_word32 ( Addr a )
780 SecMap* sm = get_secmap_for_reading(a);
781 UWord sm_off = SM_OFF(a);
782 UChar vabits8 = sm->vabits8[sm_off];
783 return vabits8;
786 static INLINE
787 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
789 SecMap* sm = get_secmap_for_writing(a);
790 UWord sm_off = SM_OFF(a);
791 sm->vabits8[sm_off] = vabits8;
795 // Forward declarations
796 static UWord get_sec_vbits8(Addr a);
797 static void set_sec_vbits8(Addr a, UWord vbits8);
799 // Returns False if there was an addressability error.
800 static INLINE
801 Bool set_vbits8 ( Addr a, UChar vbits8 )
803 Bool ok = True;
804 UChar vabits2 = get_vabits2(a);
805 if ( VA_BITS2_NOACCESS != vabits2 ) {
806 // Addressable. Convert in-register format to in-memory format.
807 // Also remove any existing sec V bit entry for the byte if no
808 // longer necessary.
809 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
810 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
811 else { vabits2 = VA_BITS2_PARTDEFINED;
812 set_sec_vbits8(a, vbits8); }
813 set_vabits2(a, vabits2);
815 } else {
816 // Unaddressable! Do nothing -- when writing to unaddressable
817 // memory it acts as a black hole, and the V bits can never be seen
818 // again. So we don't have to write them at all.
819 ok = False;
821 return ok;
824 // Returns False if there was an addressability error. In that case, we put
825 // all defined bits into vbits8.
826 static INLINE
827 Bool get_vbits8 ( Addr a, UChar* vbits8 )
829 Bool ok = True;
830 UChar vabits2 = get_vabits2(a);
832 // Convert the in-memory format to in-register format.
833 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
834 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
835 else if ( VA_BITS2_NOACCESS == vabits2 ) {
836 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
837 ok = False;
838 } else {
839 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
840 *vbits8 = get_sec_vbits8(a);
842 return ok;
846 /* --------------- Secondary V bit table ------------ */
848 // This table holds the full V bit pattern for partially-defined bytes
849 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
850 // memory.
852 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
853 // then overwrite the same address with a fully defined byte, the sec-V-bit
854 // node will not necessarily be removed. This is because checking for
855 // whether removal is necessary would slow down the fast paths.
857 // To avoid the stale nodes building up too much, we periodically (once the
858 // table reaches a certain size) garbage collect (GC) the table by
859 // traversing it and evicting any nodes not having PDB.
860 // If more than a certain proportion of nodes survived, we increase the
861 // table size so that GCs occur less often.
863 // This policy is designed to avoid bad table bloat in the worst case where
864 // a program creates huge numbers of stale PDBs -- we would get this bloat
865 // if we had no GC -- while handling well the case where a node becomes
866 // stale but shortly afterwards is rewritten with a PDB and so becomes
867 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
868 // remove all stale nodes as soon as possible, we just end up re-adding a
869 // lot of them in later again. The "sufficiently stale" approach avoids
870 // this. (If a program has many live PDBs, performance will just suck,
871 // there's no way around that.)
873 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
874 // holding on to stale entries for 2 GCs before discarding them can lead
875 // to massive space leaks. So we're changing to an arrangement where
876 // lines are evicted as soon as they are observed to be stale during a
877 // GC. This also has a side benefit of allowing the sufficiently_stale
878 // field to be removed from the SecVBitNode struct, reducing its size by
879 // 8 bytes, which is a substantial space saving considering that the
880 // struct was previously 32 or so bytes, on a 64 bit target.
882 // In order to try and mitigate the problem that the "sufficiently stale"
883 // heuristic was designed to avoid, the table size is allowed to drift
884 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
885 // means that nodes will exist in the table longer on average, and hopefully
886 // will be deleted and re-added less frequently.
888 // The previous scaling up mechanism (now called STEPUP) is retained:
889 // if residency exceeds 50%, the table is scaled up, although by a
890 // factor sqrt(2) rather than 2 as before. This effectively doubles the
891 // frequency of GCs when there are many PDBs at reduces the tendency of
892 // stale PDBs to reside for long periods in the table.
894 static OSet* secVBitTable;
896 // Stats
897 static ULong sec_vbits_new_nodes = 0;
898 static ULong sec_vbits_updates = 0;
900 // This must be a power of two; this is checked in mc_pre_clo_init().
901 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
902 // a larger address range) they take more space but we can get multiple
903 // partially-defined bytes in one if they are close to each other, reducing
904 // the number of total nodes. In practice sometimes they are clustered (eg.
905 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
906 // row), but often not. So we choose something intermediate.
907 #define BYTES_PER_SEC_VBIT_NODE 16
909 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
910 // more than this many nodes survive a GC.
911 #define STEPUP_SURVIVOR_PROPORTION 0.5
912 #define STEPUP_GROWTH_FACTOR 1.414213562
914 // If the above heuristic doesn't apply, then we may make the table
915 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
916 // this many nodes survive a GC, _and_ the total table size does
917 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
918 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
919 // effectively although gradually reduces residency and increases time
920 // between GCs for programs with small numbers of PDBs. The 80000 limit
921 // effectively limits the table size to around 2MB for programs with
922 // small numbers of PDBs, whilst giving a reasonably long lifetime to
923 // entries, to try and reduce the costs resulting from deleting and
924 // re-adding of entries.
925 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
926 #define DRIFTUP_GROWTH_FACTOR 1.015
927 #define DRIFTUP_MAX_SIZE 80000
929 // We GC the table when it gets this many nodes in it, ie. it's effectively
930 // the table size. It can change.
931 static Int secVBitLimit = 1000;
933 // The number of GCs done, used to age sec-V-bit nodes for eviction.
934 // Because it's unsigned, wrapping doesn't matter -- the right answer will
935 // come out anyway.
936 static UInt GCs_done = 0;
938 typedef
939 struct {
940 Addr a;
941 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
943 SecVBitNode;
945 static OSet* createSecVBitTable(void)
947 OSet* newSecVBitTable;
948 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
949 ( offsetof(SecVBitNode, a),
950 NULL, // use fast comparisons
951 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
952 VG_(free),
953 1000,
954 sizeof(SecVBitNode));
955 return newSecVBitTable;
958 static void gcSecVBitTable(void)
960 OSet* secVBitTable2;
961 SecVBitNode* n;
962 Int i, n_nodes = 0, n_survivors = 0;
964 GCs_done++;
966 // Create the new table.
967 secVBitTable2 = createSecVBitTable();
969 // Traverse the table, moving fresh nodes into the new table.
970 VG_(OSetGen_ResetIter)(secVBitTable);
971 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
972 // Keep node if any of its bytes are non-stale. Using
973 // get_vabits2() for the lookup is not very efficient, but I don't
974 // think it matters.
975 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
976 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
977 // Found a non-stale byte, so keep =>
978 // Insert a copy of the node into the new table.
979 SecVBitNode* n2 =
980 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
981 *n2 = *n;
982 VG_(OSetGen_Insert)(secVBitTable2, n2);
983 break;
988 // Get the before and after sizes.
989 n_nodes = VG_(OSetGen_Size)(secVBitTable);
990 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
992 // Destroy the old table, and put the new one in its place.
993 VG_(OSetGen_Destroy)(secVBitTable);
994 secVBitTable = secVBitTable2;
996 if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
997 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
998 n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
1001 // Increase table size if necessary.
1002 if ((Double)n_survivors
1003 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
1004 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
1005 if (VG_(clo_verbosity) > 1)
1006 VG_(message)(Vg_DebugMsg,
1007 "memcheck GC: %d new table size (stepup)\n",
1008 secVBitLimit);
1010 else
1011 if (secVBitLimit < DRIFTUP_MAX_SIZE
1012 && (Double)n_survivors
1013 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
1014 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
1015 if (VG_(clo_verbosity) > 1)
1016 VG_(message)(Vg_DebugMsg,
1017 "memcheck GC: %d new table size (driftup)\n",
1018 secVBitLimit);
1022 static UWord get_sec_vbits8(Addr a)
1024 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1025 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1026 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1027 UChar vbits8;
1028 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1029 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1030 // make it to the secondary V bits table.
1031 vbits8 = n->vbits8[amod];
1032 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1033 return vbits8;
1036 static void set_sec_vbits8(Addr a, UWord vbits8)
1038 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1039 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1040 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1041 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1042 // make it to the secondary V bits table.
1043 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1044 if (n) {
1045 n->vbits8[amod] = vbits8; // update
1046 sec_vbits_updates++;
1047 } else {
1048 // Do a table GC if necessary. Nb: do this before creating and
1049 // inserting the new node, to avoid erroneously GC'ing the new node.
1050 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1051 gcSecVBitTable();
1054 // New node: assign the specific byte, make the rest invalid (they
1055 // should never be read as-is, but be cautious).
1056 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1057 n->a = aAligned;
1058 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1059 n->vbits8[i] = V_BITS8_UNDEFINED;
1061 n->vbits8[amod] = vbits8;
1063 // Insert the new node.
1064 VG_(OSetGen_Insert)(secVBitTable, n);
1065 sec_vbits_new_nodes++;
1067 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1068 if (n_secVBit_nodes > max_secVBit_nodes)
1069 max_secVBit_nodes = n_secVBit_nodes;
1073 /* --------------- Endianness helpers --------------- */
1075 /* Returns the offset in memory of the byteno-th most significant byte
1076 in a wordszB-sized word, given the specified endianness. */
1077 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1078 UWord byteno ) {
1079 return bigendian ? (wordszB-1-byteno) : byteno;
1083 /* --------------- Ignored address ranges --------------- */
1085 /* Denotes the address-error-reportability status for address ranges:
1086 IAR_NotIgnored: the usual case -- report errors in this range
1087 IAR_CommandLine: don't report errors -- from command line setting
1088 IAR_ClientReq: don't report errors -- from client request
1090 typedef
1091 enum { IAR_INVALID=99,
1092 IAR_NotIgnored,
1093 IAR_CommandLine,
1094 IAR_ClientReq }
1095 IARKind;
1097 static const HChar* showIARKind ( IARKind iark )
1099 switch (iark) {
1100 case IAR_INVALID: return "INVALID";
1101 case IAR_NotIgnored: return "NotIgnored";
1102 case IAR_CommandLine: return "CommandLine";
1103 case IAR_ClientReq: return "ClientReq";
1104 default: return "???";
1108 // RangeMap<IARKind>
1109 static RangeMap* gIgnoredAddressRanges = NULL;
1111 static void init_gIgnoredAddressRanges ( void )
1113 if (LIKELY(gIgnoredAddressRanges != NULL))
1114 return;
1115 gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1116 VG_(free), IAR_NotIgnored );
1119 Bool MC_(in_ignored_range) ( Addr a )
1121 if (LIKELY(gIgnoredAddressRanges == NULL))
1122 return False;
1123 UWord how = IAR_INVALID;
1124 UWord key_min = ~(UWord)0;
1125 UWord key_max = (UWord)0;
1126 VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1127 tl_assert(key_min <= a && a <= key_max);
1128 switch (how) {
1129 case IAR_NotIgnored: return False;
1130 case IAR_CommandLine: return True;
1131 case IAR_ClientReq: return True;
1132 default: break; /* invalid */
1134 VG_(tool_panic)("MC_(in_ignore_range)");
1135 /*NOTREACHED*/
1138 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
1140 if (LIKELY(!MC_(clo_ignore_range_below_sp)))
1141 return False;
1142 tl_assert(szB >= 1 && szB <= 32);
1143 tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
1144 > MC_(clo_ignore_range_below_sp__last_offset));
1145 Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
1146 Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
1147 if (range_lo >= range_hi) {
1148 /* Bizarre. We have a wraparound situation. What should we do? */
1149 return False; // Play safe
1150 } else {
1151 /* This is the expected case. */
1152 if (range_lo <= a && a + szB - 1 <= range_hi)
1153 return True;
1154 else
1155 return False;
1157 /*NOTREACHED*/
1158 tl_assert(0);
1161 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1163 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
1165 Bool ok = VG_(parse_Addr) (ppc, result1);
1166 if (!ok)
1167 return False;
1168 if (**ppc != '-')
1169 return False;
1170 (*ppc)++;
1171 ok = VG_(parse_Addr) (ppc, result2);
1172 if (!ok)
1173 return False;
1174 return True;
1177 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1178 or fail. */
1180 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
1182 Bool ok = VG_(parse_UInt) (ppc, result1);
1183 if (!ok)
1184 return False;
1185 if (**ppc != '-')
1186 return False;
1187 (*ppc)++;
1188 ok = VG_(parse_UInt) (ppc, result2);
1189 if (!ok)
1190 return False;
1191 return True;
1194 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1195 fail. If they are valid, add them to the global set of ignored
1196 ranges. */
1197 static Bool parse_ignore_ranges ( const HChar* str0 )
1199 init_gIgnoredAddressRanges();
1200 const HChar* str = str0;
1201 const HChar** ppc = &str;
1202 while (1) {
1203 Addr start = ~(Addr)0;
1204 Addr end = (Addr)0;
1205 Bool ok = parse_Addr_pair(ppc, &start, &end);
1206 if (!ok)
1207 return False;
1208 if (start > end)
1209 return False;
1210 VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1211 if (**ppc == 0)
1212 return True;
1213 if (**ppc != ',')
1214 return False;
1215 (*ppc)++;
1217 /*NOTREACHED*/
1218 return False;
1221 /* Add or remove [start, +len) from the set of ignored ranges. */
1222 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1224 init_gIgnoredAddressRanges();
1225 const Bool verbose = (VG_(clo_verbosity) > 1);
1226 if (len == 0) {
1227 return False;
1229 if (addRange) {
1230 VG_(bindRangeMap)(gIgnoredAddressRanges,
1231 start, start+len-1, IAR_ClientReq);
1232 if (verbose)
1233 VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1234 (void*)start, (void*)(start+len-1));
1235 } else {
1236 VG_(bindRangeMap)(gIgnoredAddressRanges,
1237 start, start+len-1, IAR_NotIgnored);
1238 if (verbose)
1239 VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1240 (void*)start, (void*)(start+len-1));
1242 if (verbose) {
1243 VG_(dmsg)("memcheck: now have %u ranges:\n",
1244 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1245 UInt i;
1246 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1247 UWord val = IAR_INVALID;
1248 UWord key_min = ~(UWord)0;
1249 UWord key_max = (UWord)0;
1250 VG_(indexRangeMap)( &key_min, &key_max, &val,
1251 gIgnoredAddressRanges, i );
1252 VG_(dmsg)("memcheck: [%u] %016lx-%016lx %s\n",
1253 i, key_min, key_max, showIARKind(val));
1256 return True;
1260 /* --------------- Load/store slow cases. --------------- */
1262 static
1263 __attribute__((noinline))
1264 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1265 Addr a, SizeT nBits, Bool bigendian )
1267 ULong pessim[4]; /* only used when p-l-ok=yes */
1268 SSizeT szB = nBits / 8;
1269 SSizeT szL = szB / 8; /* Size in Longs (64-bit units) */
1270 SSizeT i, j; /* Must be signed. */
1271 SizeT n_addrs_bad = 0;
1272 Addr ai;
1273 UChar vbits8;
1274 Bool ok;
1276 /* Code below assumes load size is a power of two and at least 64
1277 bits. */
1278 tl_assert((szB & (szB-1)) == 0 && szL > 0);
1280 /* If this triggers, you probably just need to increase the size of
1281 the pessim array. */
1282 tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1284 for (j = 0; j < szL; j++) {
1285 pessim[j] = V_BITS64_DEFINED;
1286 res[j] = V_BITS64_UNDEFINED;
1289 /* Make up a result V word, which contains the loaded data for
1290 valid addresses and Defined for invalid addresses. Iterate over
1291 the bytes in the word, from the most significant down to the
1292 least. The vbits to return are calculated into vbits128. Also
1293 compute the pessimising value to be used when
1294 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1295 info can be gleaned from the pessim array) but is used as a
1296 cross-check. */
1297 for (j = szL-1; j >= 0; j--) {
1298 ULong vbits64 = V_BITS64_UNDEFINED;
1299 ULong pessim64 = V_BITS64_DEFINED;
1300 UWord long_index = byte_offset_w(szL, bigendian, j);
1301 for (i = 8-1; i >= 0; i--) {
1302 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1303 ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1304 ok = get_vbits8(ai, &vbits8);
1305 vbits64 <<= 8;
1306 vbits64 |= vbits8;
1307 if (!ok) n_addrs_bad++;
1308 pessim64 <<= 8;
1309 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1311 res[long_index] = vbits64;
1312 pessim[long_index] = pessim64;
1315 /* In the common case, all the addresses involved are valid, so we
1316 just return the computed V bits and have done. */
1317 if (LIKELY(n_addrs_bad == 0))
1318 return;
1320 /* If there's no possibility of getting a partial-loads-ok
1321 exemption, report the error and quit. */
1322 if (!MC_(clo_partial_loads_ok)) {
1323 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1324 return;
1327 /* The partial-loads-ok excemption might apply. Find out if it
1328 does. If so, don't report an addressing error, but do return
1329 Undefined for the bytes that are out of range, so as to avoid
1330 false negatives. If it doesn't apply, just report an addressing
1331 error in the usual way. */
1333 /* Some code steps along byte strings in aligned chunks
1334 even when there is only a partially defined word at the end (eg,
1335 optimised strlen). This is allowed by the memory model of
1336 modern machines, since an aligned load cannot span two pages and
1337 thus cannot "partially fault".
1339 Therefore, a load from a partially-addressible place is allowed
1340 if all of the following hold:
1341 - the command-line flag is set [by default, it isn't]
1342 - it's an aligned load
1343 - at least one of the addresses in the word *is* valid
1345 Since this suppresses the addressing error, we avoid false
1346 negatives by marking bytes undefined when they come from an
1347 invalid address.
1350 /* "at least one of the addresses is invalid" */
1351 ok = False;
1352 for (j = 0; j < szL; j++)
1353 ok |= pessim[j] != V_BITS64_DEFINED;
1354 tl_assert(ok);
1356 # if defined(VGP_s390x_linux)
1357 tl_assert(szB == 16); // s390 doesn't have > 128 bit SIMD
1358 /* OK if all loaded bytes are from the same page. */
1359 Bool alignedOK = ((a & 0xfff) <= 0x1000 - szB);
1360 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1361 /* lxvd2x might generate an unaligned 128 bit vector load. */
1362 Bool alignedOK = (szB == 16);
1363 # else
1364 /* OK if the address is aligned by the load size. */
1365 Bool alignedOK = (0 == (a & (szB - 1)));
1366 # endif
1368 if (alignedOK && n_addrs_bad < szB) {
1369 /* Exemption applies. Use the previously computed pessimising
1370 value and return the combined result, but don't flag an
1371 addressing error. The pessimising value is Defined for valid
1372 addresses and Undefined for invalid addresses. */
1373 /* for assumption that doing bitwise or implements UifU */
1374 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1375 /* (really need "UifU" here...)
1376 vbits[j] UifU= pessim[j] (is pessimised by it, iow) */
1377 for (j = szL-1; j >= 0; j--)
1378 res[j] |= pessim[j];
1379 return;
1382 /* Exemption doesn't apply. Flag an addressing error in the normal
1383 way. */
1384 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1387 MC_MAIN_STATIC
1388 __attribute__((noinline))
1389 __attribute__((used))
1390 VG_REGPARM(3)
1391 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian );
1393 MC_MAIN_STATIC
1394 __attribute__((noinline))
1395 __attribute__((used))
1396 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1397 this function may get called from hand written assembly. */
1398 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1400 PROF_EVENT(MCPE_LOADVN_SLOW);
1402 /* ------------ BEGIN semi-fast cases ------------ */
1403 /* These deal quickly-ish with the common auxiliary primary map
1404 cases on 64-bit platforms. Are merely a speedup hack; can be
1405 omitted without loss of correctness/functionality. Note that in
1406 both cases the "sizeof(void*) == 8" causes these cases to be
1407 folded out by compilers on 32-bit platforms. These are derived
1408 from LOADV64 and LOADV32.
1411 # if defined(VGA_mips64) && defined(VGABI_N32)
1412 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1413 # else
1414 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1415 # endif
1417 SecMap* sm = get_secmap_for_reading(a);
1418 UWord sm_off16 = SM_OFF_16(a);
1419 UWord vabits16 = sm->vabits16[sm_off16];
1420 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1421 return V_BITS64_DEFINED;
1422 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1423 return V_BITS64_UNDEFINED;
1424 /* else fall into the slow case */
1427 # if defined(VGA_mips64) && defined(VGABI_N32)
1428 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1429 # else
1430 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1431 # endif
1433 SecMap* sm = get_secmap_for_reading(a);
1434 UWord sm_off = SM_OFF(a);
1435 UWord vabits8 = sm->vabits8[sm_off];
1436 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1437 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1438 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1439 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1440 /* else fall into slow case */
1443 /* ------------ END semi-fast cases ------------ */
1445 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1446 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1447 SSizeT szB = nBits / 8;
1448 SSizeT i; /* Must be signed. */
1449 SizeT n_addrs_bad = 0;
1450 Addr ai;
1451 UChar vbits8;
1452 Bool ok;
1454 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1456 /* Make up a 64-bit result V word, which contains the loaded data
1457 for valid addresses and Defined for invalid addresses. Iterate
1458 over the bytes in the word, from the most significant down to
1459 the least. The vbits to return are calculated into vbits64.
1460 Also compute the pessimising value to be used when
1461 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1462 info can be gleaned from pessim64) but is used as a
1463 cross-check. */
1464 for (i = szB-1; i >= 0; i--) {
1465 PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1466 ai = a + byte_offset_w(szB, bigendian, i);
1467 ok = get_vbits8(ai, &vbits8);
1468 vbits64 <<= 8;
1469 vbits64 |= vbits8;
1470 if (!ok) n_addrs_bad++;
1471 pessim64 <<= 8;
1472 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1475 /* In the common case, all the addresses involved are valid, so we
1476 just return the computed V bits and have done. */
1477 if (LIKELY(n_addrs_bad == 0))
1478 return vbits64;
1480 /* If there's no possibility of getting a partial-loads-ok
1481 exemption, report the error and quit. */
1482 if (!MC_(clo_partial_loads_ok)) {
1483 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1484 return vbits64;
1487 /* The partial-loads-ok excemption might apply. Find out if it
1488 does. If so, don't report an addressing error, but do return
1489 Undefined for the bytes that are out of range, so as to avoid
1490 false negatives. If it doesn't apply, just report an addressing
1491 error in the usual way. */
1493 /* Some code steps along byte strings in aligned word-sized chunks
1494 even when there is only a partially defined word at the end (eg,
1495 optimised strlen). This is allowed by the memory model of
1496 modern machines, since an aligned load cannot span two pages and
1497 thus cannot "partially fault". Despite such behaviour being
1498 declared undefined by ANSI C/C++.
1500 Therefore, a load from a partially-addressible place is allowed
1501 if all of the following hold:
1502 - the command-line flag is set [by default, it isn't]
1503 - it's a word-sized, word-aligned load
1504 - at least one of the addresses in the word *is* valid
1506 Since this suppresses the addressing error, we avoid false
1507 negatives by marking bytes undefined when they come from an
1508 invalid address.
1511 /* "at least one of the addresses is invalid" */
1512 tl_assert(pessim64 != V_BITS64_DEFINED);
1514 # if defined(VGA_mips64) && defined(VGABI_N32)
1515 if (szB == VG_WORDSIZE * 2 && VG_IS_WORD_ALIGNED(a)
1516 && n_addrs_bad < VG_WORDSIZE * 2)
1517 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1518 /* On power unaligned loads of words are OK. */
1519 if (szB == VG_WORDSIZE && n_addrs_bad < VG_WORDSIZE)
1520 # else
1521 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1522 && n_addrs_bad < VG_WORDSIZE)
1523 # endif
1525 /* Exemption applies. Use the previously computed pessimising
1526 value for vbits64 and return the combined result, but don't
1527 flag an addressing error. The pessimising value is Defined
1528 for valid addresses and Undefined for invalid addresses. */
1529 /* for assumption that doing bitwise or implements UifU */
1530 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1531 /* (really need "UifU" here...)
1532 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1533 vbits64 |= pessim64;
1534 return vbits64;
1537 /* Also, in appears that gcc generates string-stepping code in
1538 32-bit chunks on 64 bit platforms. So, also grant an exception
1539 for this case. Note that the first clause of the conditional
1540 (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1541 will get folded out in 32 bit builds. */
1542 # if defined(VGA_mips64) && defined(VGABI_N32)
1543 if (VG_WORDSIZE == 4
1544 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1545 # else
1546 if (VG_WORDSIZE == 8
1547 && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4)
1548 # endif
1550 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1551 /* (really need "UifU" here...)
1552 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1553 vbits64 |= pessim64;
1554 /* Mark the upper 32 bits as undefined, just to be on the safe
1555 side. */
1556 vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1557 return vbits64;
1560 /* Exemption doesn't apply. Flag an addressing error in the normal
1561 way. */
1562 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1564 return vbits64;
1568 static
1569 __attribute__((noinline))
1570 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1572 SizeT szB = nBits / 8;
1573 SizeT i, n_addrs_bad = 0;
1574 UChar vbits8;
1575 Addr ai;
1576 Bool ok;
1578 PROF_EVENT(MCPE_STOREVN_SLOW);
1580 /* ------------ BEGIN semi-fast cases ------------ */
1581 /* These deal quickly-ish with the common auxiliary primary map
1582 cases on 64-bit platforms. Are merely a speedup hack; can be
1583 omitted without loss of correctness/functionality. Note that in
1584 both cases the "sizeof(void*) == 8" causes these cases to be
1585 folded out by compilers on 32-bit platforms. The logic below
1586 is somewhat similar to some cases extensively commented in
1587 MC_(helperc_STOREV8).
1589 # if defined(VGA_mips64) && defined(VGABI_N32)
1590 if (LIKELY(sizeof(void*) == 4 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1591 # else
1592 if (LIKELY(sizeof(void*) == 8 && nBits == 64 && VG_IS_8_ALIGNED(a)))
1593 # endif
1595 SecMap* sm = get_secmap_for_reading(a);
1596 UWord sm_off16 = SM_OFF_16(a);
1597 UWord vabits16 = sm->vabits16[sm_off16];
1598 if (LIKELY( !is_distinguished_sm(sm) &&
1599 (VA_BITS16_DEFINED == vabits16 ||
1600 VA_BITS16_UNDEFINED == vabits16) )) {
1601 /* Handle common case quickly: a is suitably aligned, */
1602 /* is mapped, and is addressible. */
1603 // Convert full V-bits in register to compact 2-bit form.
1604 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1605 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
1606 return;
1607 } else if (V_BITS64_UNDEFINED == vbytes) {
1608 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
1609 return;
1611 /* else fall into the slow case */
1613 /* else fall into the slow case */
1616 # if defined(VGA_mips64) && defined(VGABI_N32)
1617 if (LIKELY(sizeof(void*) == 4 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1618 # else
1619 if (LIKELY(sizeof(void*) == 8 && nBits == 32 && VG_IS_4_ALIGNED(a)))
1620 # endif
1622 SecMap* sm = get_secmap_for_reading(a);
1623 UWord sm_off = SM_OFF(a);
1624 UWord vabits8 = sm->vabits8[sm_off];
1625 if (LIKELY( !is_distinguished_sm(sm) &&
1626 (VA_BITS8_DEFINED == vabits8 ||
1627 VA_BITS8_UNDEFINED == vabits8) )) {
1628 /* Handle common case quickly: a is suitably aligned, */
1629 /* is mapped, and is addressible. */
1630 // Convert full V-bits in register to compact 2-bit form.
1631 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1632 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1633 return;
1634 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1635 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1636 return;
1638 /* else fall into the slow case */
1640 /* else fall into the slow case */
1642 /* ------------ END semi-fast cases ------------ */
1644 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1646 /* Dump vbytes in memory, iterating from least to most significant
1647 byte. At the same time establish addressibility of the location. */
1648 for (i = 0; i < szB; i++) {
1649 PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1650 ai = a + byte_offset_w(szB, bigendian, i);
1651 vbits8 = vbytes & 0xff;
1652 ok = set_vbits8(ai, vbits8);
1653 if (!ok) n_addrs_bad++;
1654 vbytes >>= 8;
1657 /* If an address error has happened, report it. */
1658 if (n_addrs_bad > 0)
1659 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1663 /*------------------------------------------------------------*/
1664 /*--- Setting permissions over address ranges. ---*/
1665 /*------------------------------------------------------------*/
1667 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1668 UWord dsm_num )
1670 UWord sm_off, sm_off16;
1671 UWord vabits2 = vabits16 & 0x3;
1672 SizeT lenA, lenB, len_to_next_secmap;
1673 Addr aNext;
1674 SecMap* sm;
1675 SecMap** sm_ptr;
1676 SecMap* example_dsm;
1678 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1680 /* Check the V+A bits make sense. */
1681 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1682 VA_BITS16_UNDEFINED == vabits16 ||
1683 VA_BITS16_DEFINED == vabits16);
1685 // This code should never write PDBs; ensure this. (See comment above
1686 // set_vabits2().)
1687 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1689 if (lenT == 0)
1690 return;
1692 if (lenT > 256 * 1024 * 1024) {
1693 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1694 const HChar* s = "unknown???";
1695 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1696 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1697 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1698 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1699 "large range [0x%lx, 0x%lx) (%s)\n",
1700 a, a + lenT, s);
1704 #ifndef PERF_FAST_SARP
1705 /*------------------ debug-only case ------------------ */
1707 // Endianness doesn't matter here because all bytes are being set to
1708 // the same value.
1709 // Nb: We don't have to worry about updating the sec-V-bits table
1710 // after these set_vabits2() calls because this code never writes
1711 // VA_BITS2_PARTDEFINED values.
1712 SizeT i;
1713 for (i = 0; i < lenT; i++) {
1714 set_vabits2(a + i, vabits2);
1716 return;
1718 #endif
1720 /*------------------ standard handling ------------------ */
1722 /* Get the distinguished secondary that we might want
1723 to use (part of the space-compression scheme). */
1724 example_dsm = &sm_distinguished[dsm_num];
1726 // We have to handle ranges covering various combinations of partial and
1727 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1728 // Cases marked with a '*' are common.
1730 // TYPE PARTS USED
1731 // ---- ----------
1732 // * one partial sec-map (p) 1
1733 // - one whole sec-map (P) 2
1735 // * two partial sec-maps (pp) 1,3
1736 // - one partial, one whole sec-map (pP) 1,2
1737 // - one whole, one partial sec-map (Pp) 2,3
1738 // - two whole sec-maps (PP) 2,2
1740 // * one partial, one whole, one partial (pPp) 1,2,3
1741 // - one partial, two whole (pPP) 1,2,2
1742 // - two whole, one partial (PPp) 2,2,3
1743 // - three whole (PPP) 2,2,2
1745 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1746 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1747 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1748 // - N whole (PP...PP) 2,2...2,3
1750 // Break up total length (lenT) into two parts: length in the first
1751 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1752 aNext = start_of_this_sm(a) + SM_SIZE;
1753 len_to_next_secmap = aNext - a;
1754 if ( lenT <= len_to_next_secmap ) {
1755 // Range entirely within one sec-map. Covers almost all cases.
1756 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1757 lenA = lenT;
1758 lenB = 0;
1759 } else if (is_start_of_sm(a)) {
1760 // Range spans at least one whole sec-map, and starts at the beginning
1761 // of a sec-map; skip to Part 2.
1762 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1763 lenA = 0;
1764 lenB = lenT;
1765 goto part2;
1766 } else {
1767 // Range spans two or more sec-maps, first one is partial.
1768 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1769 lenA = len_to_next_secmap;
1770 lenB = lenT - lenA;
1773 //------------------------------------------------------------------------
1774 // Part 1: Deal with the first sec_map. Most of the time the range will be
1775 // entirely within a sec_map and this part alone will suffice. Also,
1776 // doing it this way lets us avoid repeatedly testing for the crossing of
1777 // a sec-map boundary within these loops.
1778 //------------------------------------------------------------------------
1780 // If it's distinguished, make it undistinguished if necessary.
1781 sm_ptr = get_secmap_ptr(a);
1782 if (is_distinguished_sm(*sm_ptr)) {
1783 if (*sm_ptr == example_dsm) {
1784 // Sec-map already has the V+A bits that we want, so skip.
1785 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1786 a = aNext;
1787 lenA = 0;
1788 } else {
1789 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1790 *sm_ptr = copy_for_writing(*sm_ptr);
1793 sm = *sm_ptr;
1795 // 1 byte steps
1796 while (True) {
1797 if (VG_IS_8_ALIGNED(a)) break;
1798 if (lenA < 1) break;
1799 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1800 sm_off = SM_OFF(a);
1801 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1802 a += 1;
1803 lenA -= 1;
1805 // 8-aligned, 8 byte steps
1806 while (True) {
1807 if (lenA < 8) break;
1808 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1809 sm_off16 = SM_OFF_16(a);
1810 sm->vabits16[sm_off16] = vabits16;
1811 a += 8;
1812 lenA -= 8;
1814 // 1 byte steps
1815 while (True) {
1816 if (lenA < 1) break;
1817 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1818 sm_off = SM_OFF(a);
1819 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1820 a += 1;
1821 lenA -= 1;
1824 // We've finished the first sec-map. Is that it?
1825 if (lenB == 0)
1826 return;
1828 //------------------------------------------------------------------------
1829 // Part 2: Fast-set entire sec-maps at a time.
1830 //------------------------------------------------------------------------
1831 part2:
1832 // 64KB-aligned, 64KB steps.
1833 // Nb: we can reach here with lenB < SM_SIZE
1834 tl_assert(0 == lenA);
1835 while (True) {
1836 if (lenB < SM_SIZE) break;
1837 tl_assert(is_start_of_sm(a));
1838 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1839 sm_ptr = get_secmap_ptr(a);
1840 if (!is_distinguished_sm(*sm_ptr)) {
1841 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1842 // Free the non-distinguished sec-map that we're replacing. This
1843 // case happens moderately often, enough to be worthwhile.
1844 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1845 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1847 update_SM_counts(*sm_ptr, example_dsm);
1848 // Make the sec-map entry point to the example DSM
1849 *sm_ptr = example_dsm;
1850 lenB -= SM_SIZE;
1851 a += SM_SIZE;
1854 // We've finished the whole sec-maps. Is that it?
1855 if (lenB == 0)
1856 return;
1858 //------------------------------------------------------------------------
1859 // Part 3: Finish off the final partial sec-map, if necessary.
1860 //------------------------------------------------------------------------
1862 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1864 // If it's distinguished, make it undistinguished if necessary.
1865 sm_ptr = get_secmap_ptr(a);
1866 if (is_distinguished_sm(*sm_ptr)) {
1867 if (*sm_ptr == example_dsm) {
1868 // Sec-map already has the V+A bits that we want, so stop.
1869 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1870 return;
1871 } else {
1872 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1873 *sm_ptr = copy_for_writing(*sm_ptr);
1876 sm = *sm_ptr;
1878 // 8-aligned, 8 byte steps
1879 while (True) {
1880 if (lenB < 8) break;
1881 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1882 sm_off16 = SM_OFF_16(a);
1883 sm->vabits16[sm_off16] = vabits16;
1884 a += 8;
1885 lenB -= 8;
1887 // 1 byte steps
1888 while (True) {
1889 if (lenB < 1) return;
1890 PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1891 sm_off = SM_OFF(a);
1892 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1893 a += 1;
1894 lenB -= 1;
1899 /* --- Set permissions for arbitrary address ranges --- */
1901 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1903 PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1904 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1905 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1906 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1907 ocache_sarp_Clear_Origins ( a, len );
1910 static void make_mem_undefined ( Addr a, SizeT len )
1912 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1913 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1914 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1917 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1919 PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1920 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1921 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1922 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1923 ocache_sarp_Set_Origins ( a, len, otag );
1926 static
1927 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1928 ThreadId tid, UInt okind )
1930 UInt ecu;
1931 ExeContext* here;
1932 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1933 if it is invalid. So no need to do it here. */
1934 tl_assert(okind <= 3);
1935 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1936 tl_assert(here);
1937 ecu = VG_(get_ECU_from_ExeContext)(here);
1938 tl_assert(VG_(is_plausible_ECU)(ecu));
1939 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1942 static
1943 void mc_new_mem_w_tid_make_ECU ( Addr a, SizeT len, ThreadId tid )
1945 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1948 static
1949 void mc_new_mem_w_tid_no_ECU ( Addr a, SizeT len, ThreadId tid )
1951 MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1954 void MC_(make_mem_defined) ( Addr a, SizeT len )
1956 PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1957 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1958 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1959 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1960 ocache_sarp_Clear_Origins ( a, len );
1963 __attribute__((unused))
1964 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1966 MC_(make_mem_defined)(a, len);
1969 /* For each byte in [a,a+len), if the byte is addressable, make it be
1970 defined, but if it isn't addressible, leave it alone. In other
1971 words a version of MC_(make_mem_defined) that doesn't mess with
1972 addressibility. Low-performance implementation. */
1973 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1975 SizeT i;
1976 UChar vabits2;
1977 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1978 for (i = 0; i < len; i++) {
1979 vabits2 = get_vabits2( a+i );
1980 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1981 set_vabits2(a+i, VA_BITS2_DEFINED);
1982 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1983 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1989 /* Similarly (needed for mprotect handling ..) */
1990 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1992 SizeT i;
1993 UChar vabits2;
1994 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1995 for (i = 0; i < len; i++) {
1996 vabits2 = get_vabits2( a+i );
1997 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1998 set_vabits2(a+i, VA_BITS2_DEFINED);
1999 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
2000 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
2006 /* --- Block-copy permissions (needed for implementing realloc() and
2007 sys_mremap). --- */
2009 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
2011 SizeT i, j;
2012 UChar vabits2, vabits8;
2013 Bool aligned, nooverlap;
2015 DEBUG("MC_(copy_address_range_state)\n");
2016 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
2018 if (len == 0 || src == dst)
2019 return;
2021 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
2022 nooverlap = src+len <= dst || dst+len <= src;
2024 if (nooverlap && aligned) {
2026 /* Vectorised fast case, when no overlap and suitably aligned */
2027 /* vector loop */
2028 i = 0;
2029 while (len >= 4) {
2030 vabits8 = get_vabits8_for_aligned_word32( src+i );
2031 set_vabits8_for_aligned_word32( dst+i, vabits8 );
2032 if (LIKELY(VA_BITS8_DEFINED == vabits8
2033 || VA_BITS8_UNDEFINED == vabits8
2034 || VA_BITS8_NOACCESS == vabits8)) {
2035 /* do nothing */
2036 } else {
2037 /* have to copy secondary map info */
2038 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
2039 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
2040 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
2041 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
2042 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
2043 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
2044 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
2045 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
2047 i += 4;
2048 len -= 4;
2050 /* fixup loop */
2051 while (len >= 1) {
2052 vabits2 = get_vabits2( src+i );
2053 set_vabits2( dst+i, vabits2 );
2054 if (VA_BITS2_PARTDEFINED == vabits2) {
2055 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2057 i++;
2058 len--;
2061 } else {
2063 /* We have to do things the slow way */
2064 if (src < dst) {
2065 for (i = 0, j = len-1; i < len; i++, j--) {
2066 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
2067 vabits2 = get_vabits2( src+j );
2068 set_vabits2( dst+j, vabits2 );
2069 if (VA_BITS2_PARTDEFINED == vabits2) {
2070 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
2075 if (src > dst) {
2076 for (i = 0; i < len; i++) {
2077 PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
2078 vabits2 = get_vabits2( src+i );
2079 set_vabits2( dst+i, vabits2 );
2080 if (VA_BITS2_PARTDEFINED == vabits2) {
2081 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2090 /*------------------------------------------------------------*/
2091 /*--- Origin tracking stuff - cache basics ---*/
2092 /*------------------------------------------------------------*/
2094 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2095 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2097 Note that this implementation draws inspiration from the "origin
2098 tracking by value piggybacking" scheme described in "Tracking Bad
2099 Apples: Reporting the Origin of Null and Undefined Value Errors"
2100 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2101 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2102 implemented completely differently.
2104 Origin tags and ECUs -- about the shadow values
2105 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2107 This implementation tracks the defining point of all uninitialised
2108 values using so called "origin tags", which are 32-bit integers,
2109 rather than using the values themselves to encode the origins. The
2110 latter, so-called value piggybacking", is what the OOPSLA07 paper
2111 describes.
2113 Origin tags, as tracked by the machinery below, are 32-bit unsigned
2114 ints (UInts), regardless of the machine's word size. Each tag
2115 comprises an upper 30-bit ECU field and a lower 2-bit
2116 'kind' field. The ECU field is a number given out by m_execontext
2117 and has a 1-1 mapping with ExeContext*s. An ECU can be used
2118 directly as an origin tag (otag), but in fact we want to put
2119 additional information 'kind' field to indicate roughly where the
2120 tag came from. This helps print more understandable error messages
2121 for the user -- it has no other purpose. In summary:
2123 * Both ECUs and origin tags are represented as 32-bit words
2125 * m_execontext and the core-tool interface deal purely in ECUs.
2126 They have no knowledge of origin tags - that is a purely
2127 Memcheck-internal matter.
2129 * all valid ECUs have the lowest 2 bits zero and at least
2130 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2132 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2133 constants defined in mc_include.h.
2135 * to convert an otag back to an ECU, AND it with ~3
2137 One important fact is that no valid otag is zero. A zero otag is
2138 used by the implementation to indicate "no origin", which could
2139 mean that either the value is defined, or it is undefined but the
2140 implementation somehow managed to lose the origin.
2142 The ECU used for memory created by malloc etc is derived from the
2143 stack trace at the time the malloc etc happens. This means the
2144 mechanism can show the exact allocation point for heap-created
2145 uninitialised values.
2147 In contrast, it is simply too expensive to create a complete
2148 backtrace for each stack allocation. Therefore we merely use a
2149 depth-1 backtrace for stack allocations, which can be done once at
2150 translation time, rather than N times at run time. The result of
2151 this is that, for stack created uninitialised values, Memcheck can
2152 only show the allocating function, and not what called it.
2153 Furthermore, compilers tend to move the stack pointer just once at
2154 the start of the function, to allocate all locals, and so in fact
2155 the stack origin almost always simply points to the opening brace
2156 of the function. Net result is, for stack origins, the mechanism
2157 can tell you in which function the undefined value was created, but
2158 that's all. Users will need to carefully check all locals in the
2159 specified function.
2161 Shadowing registers and memory
2162 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2164 Memory is shadowed using a two level cache structure (ocacheL1 and
2165 ocacheL2). Memory references are first directed to ocacheL1. This
2166 is a traditional 2-way set associative cache with 32-byte lines and
2167 approximate LRU replacement within each set.
2169 A naive implementation would require storing one 32 bit otag for
2170 each byte of memory covered, a 4:1 space overhead. Instead, there
2171 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2172 that shows which of the 4 bytes have that shadow value and which
2173 have a shadow value of zero (indicating no origin). Hence a lot of
2174 space is saved, but the cost is that only one different origin per
2175 4 bytes of address space can be represented. This is a source of
2176 imprecision, but how much of a problem it really is remains to be
2177 seen.
2179 A cache line that contains all zeroes ("no origins") contains no
2180 useful information, and can be ejected from the L1 cache "for
2181 free", in the sense that a read miss on the L1 causes a line of
2182 zeroes to be installed. However, ejecting a line containing
2183 nonzeroes risks losing origin information permanently. In order to
2184 prevent such lossage, ejected nonzero lines are placed in a
2185 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2186 lines. This can grow arbitrarily large, and so should ensure that
2187 Memcheck runs out of memory in preference to losing useful origin
2188 info due to cache size limitations.
2190 Shadowing registers is a bit tricky, because the shadow values are
2191 32 bits, regardless of the size of the register. That gives a
2192 problem for registers smaller than 32 bits. The solution is to
2193 find spaces in the guest state that are unused, and use those to
2194 shadow guest state fragments smaller than 32 bits. For example, on
2195 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
2196 shadow are allocated for the register's otag, then there are still
2197 12 bytes left over which could be used to shadow 3 other values.
2199 This implies there is some non-obvious mapping from guest state
2200 (start,length) pairs to the relevant shadow offset (for the origin
2201 tags). And it is unfortunately guest-architecture specific. The
2202 mapping is contained in mc_machine.c, which is quite lengthy but
2203 straightforward.
2205 Instrumenting the IR
2206 ~~~~~~~~~~~~~~~~~~~~
2208 Instrumentation is largely straightforward, and done by the
2209 functions schemeE and schemeS in mc_translate.c. These generate
2210 code for handling the origin tags of expressions (E) and statements
2211 (S) respectively. The rather strange names are a reference to the
2212 "compilation schemes" shown in Simon Peyton Jones' book "The
2213 Implementation of Functional Programming Languages" (Prentice Hall,
2214 1987, see
2215 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2217 schemeS merely arranges to move shadow values around the guest
2218 state to track the incoming IR. schemeE is largely trivial too.
2219 The only significant point is how to compute the otag corresponding
2220 to binary (or ternary, quaternary, etc) operator applications. The
2221 rule is simple: just take whichever value is larger (32-bit
2222 unsigned max). Constants get the special value zero. Hence this
2223 rule always propagates a nonzero (known) otag in preference to a
2224 zero (unknown, or more likely, value-is-defined) tag, as we want.
2225 If two different undefined values are inputs to a binary operator
2226 application, then which is propagated is arbitrary, but that
2227 doesn't matter, since the program is erroneous in using either of
2228 the values, and so there's no point in attempting to propagate
2229 both.
2231 Since constants are abstracted to (otag) zero, much of the
2232 instrumentation code can be folded out without difficulty by the
2233 generic post-instrumentation IR cleanup pass, using these rules:
2234 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2235 constants is evaluated at JIT time. And the resulting dead code
2236 removal. In practice this causes surprisingly few Max32Us to
2237 survive through to backend code generation.
2239 Integration with the V-bits machinery
2240 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2242 This is again largely straightforward. Mostly the otag and V bits
2243 stuff are independent. The only point of interaction is when the V
2244 bits instrumenter creates a call to a helper function to report an
2245 uninitialised value error -- in that case it must first use schemeE
2246 to get hold of the origin tag expression for the value, and pass
2247 that to the helper too.
2249 There is the usual stuff to do with setting address range
2250 permissions. When memory is painted undefined, we must also know
2251 the origin tag to paint with, which involves some tedious plumbing,
2252 particularly to do with the fast case stack handlers. When memory
2253 is painted defined or noaccess then the origin tags must be forced
2254 to zero.
2256 One of the goals of the implementation was to ensure that the
2257 non-origin tracking mode isn't slowed down at all. To do this,
2258 various functions to do with memory permissions setting (again,
2259 mostly pertaining to the stack) are duplicated for the with- and
2260 without-otag case.
2262 Dealing with stack redzones, and the NIA cache
2263 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2265 This is one of the few non-obvious parts of the implementation.
2267 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2268 reserved area below the stack pointer, that can be used as scratch
2269 space by compiler generated code for functions. In the Memcheck
2270 sources this is referred to as the "stack redzone". The important
2271 thing here is that such redzones are considered volatile across
2272 function calls and returns. So Memcheck takes care to mark them as
2273 undefined for each call and return, on the afflicted platforms.
2274 Past experience shows this is essential in order to get reliable
2275 messages about uninitialised values that come from the stack.
2277 So the question is, when we paint a redzone undefined, what origin
2278 tag should we use for it? Consider a function f() calling g(). If
2279 we paint the redzone using an otag derived from the ExeContext of
2280 the CALL/BL instruction in f, then any errors in g causing it to
2281 use uninitialised values that happen to lie in the redzone, will be
2282 reported as having their origin in f. Which is highly confusing.
2284 The same applies for returns: if, on a return, we paint the redzone
2285 using a origin tag derived from the ExeContext of the RET/BLR
2286 instruction in g, then any later errors in f causing it to use
2287 uninitialised values in the redzone, will be reported as having
2288 their origin in g. Which is just as confusing.
2290 To do it right, in both cases we need to use an origin tag which
2291 pertains to the instruction which dynamically follows the CALL/BL
2292 or RET/BLR. In short, one derived from the NIA - the "next
2293 instruction address".
2295 To make this work, Memcheck's redzone-painting helper,
2296 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2297 NIA. It converts the NIA to a 1-element ExeContext, and uses that
2298 ExeContext's ECU as the basis for the otag used to paint the
2299 redzone. The expensive part of this is converting an NIA into an
2300 ECU, since this happens once for every call and every return. So
2301 we use a simple 511-line, 2-way set associative cache
2302 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2303 the cost out.
2305 Further background comments
2306 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2308 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
2309 > it really just the address of the relevant ExeContext?
2311 Well, it's not the address, but a value which has a 1-1 mapping
2312 with ExeContexts, and is guaranteed not to be zero, since zero
2313 denotes (to memcheck) "unknown origin or defined value". So these
2314 UInts are just numbers starting at 4 and incrementing by 4; each
2315 ExeContext is given a number when it is created. (*** NOTE this
2316 confuses otags and ECUs; see comments above ***).
2318 Making these otags 32-bit regardless of the machine's word size
2319 makes the 64-bit implementation easier (next para). And it doesn't
2320 really limit us in any way, since for the tags to overflow would
2321 require that the program somehow caused 2^30-1 different
2322 ExeContexts to be created, in which case it is probably in deep
2323 trouble. Not to mention V will have soaked up many tens of
2324 gigabytes of memory merely to store them all.
2326 So having 64-bit origins doesn't really buy you anything, and has
2327 the following downsides:
2329 Suppose that instead, an otag is a UWord. This would mean that, on
2330 a 64-bit target,
2332 1. It becomes hard to shadow any element of guest state which is
2333 smaller than 8 bytes. To do so means you'd need to find some
2334 8-byte-sized hole in the guest state which you don't want to
2335 shadow, and use that instead to hold the otag. On ppc64, the
2336 condition code register(s) are split into 20 UChar sized pieces,
2337 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2338 and so that would entail finding 160 bytes somewhere else in the
2339 guest state.
2341 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2342 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2343 same) and so I had to look for 4 untracked otag-sized areas in
2344 the guest state to make that possible.
2346 The same problem exists of course when origin tags are only 32
2347 bits, but it's less extreme.
2349 2. (More compelling) it doubles the size of the origin shadow
2350 memory. Given that the shadow memory is organised as a fixed
2351 size cache, and that accuracy of tracking is limited by origins
2352 falling out the cache due to space conflicts, this isn't good.
2354 > Another question: is the origin tracking perfect, or are there
2355 > cases where it fails to determine an origin?
2357 It is imperfect for at least for the following reasons, and
2358 probably more:
2360 * Insufficient capacity in the origin cache. When a line is
2361 evicted from the cache it is gone forever, and so subsequent
2362 queries for the line produce zero, indicating no origin
2363 information. Interestingly, a line containing all zeroes can be
2364 evicted "free" from the cache, since it contains no useful
2365 information, so there is scope perhaps for some cleverer cache
2366 management schemes. (*** NOTE, with the introduction of the
2367 second level origin tag cache, ocacheL2, this is no longer a
2368 problem. ***)
2370 * The origin cache only stores one otag per 32-bits of address
2371 space, plus 4 bits indicating which of the 4 bytes has that tag
2372 and which are considered defined. The result is that if two
2373 undefined bytes in the same word are stored in memory, the first
2374 stored byte's origin will be lost and replaced by the origin for
2375 the second byte.
2377 * Nonzero origin tags for defined values. Consider a binary
2378 operator application op(x,y). Suppose y is undefined (and so has
2379 a valid nonzero origin tag), and x is defined, but erroneously
2380 has a nonzero origin tag (defined values should have tag zero).
2381 If the erroneous tag has a numeric value greater than y's tag,
2382 then the rule for propagating origin tags though binary
2383 operations, which is simply to take the unsigned max of the two
2384 tags, will erroneously propagate x's tag rather than y's.
2386 * Some obscure uses of x86/amd64 byte registers can cause lossage
2387 or confusion of origins. %AH .. %DH are treated as different
2388 from, and unrelated to, their parent registers, %EAX .. %EDX.
2389 So some weird sequences like
2391 movb undefined-value, %AH
2392 movb defined-value, %AL
2393 .. use %AX or %EAX ..
2395 will cause the origin attributed to %AH to be ignored, since %AL,
2396 %AX, %EAX are treated as the same register, and %AH as a
2397 completely separate one.
2399 But having said all that, it actually seems to work fairly well in
2400 practice.
2403 static UWord stats_ocacheL1_find = 0;
2404 static UWord stats_ocacheL1_found_at_1 = 0;
2405 static UWord stats_ocacheL1_found_at_N = 0;
2406 static UWord stats_ocacheL1_misses = 0;
2407 static UWord stats_ocacheL1_lossage = 0;
2408 static UWord stats_ocacheL1_movefwds = 0;
2410 static UWord stats__ocacheL2_finds = 0;
2411 static UWord stats__ocacheL2_adds = 0;
2412 static UWord stats__ocacheL2_dels = 0;
2413 static UWord stats__ocacheL2_misses = 0;
2414 static UWord stats__ocacheL2_n_nodes_max = 0;
2416 /* Cache of 32-bit values, one every 32 bits of address space */
2418 #define OC_BITS_PER_LINE 5
2419 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2421 static INLINE UWord oc_line_offset ( Addr a ) {
2422 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2424 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2425 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2428 #define OC_LINES_PER_SET 2
2430 #define OC_N_SET_BITS 20
2431 #define OC_N_SETS (1 << OC_N_SET_BITS)
2433 /* These settings give:
2434 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2435 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2438 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2441 /* Originally (pre Dec 2021) it was the case that this code had a
2442 parameterizable cache line size, set by changing OC_BITS_PER_LINE.
2443 However, as a result of the speedup fixes necessitated by bug 446103, that
2444 is no longer really the case, and much of the L1 and L2 cache code has been
2445 tuned specifically for the case OC_BITS_PER_LINE == 5 (that is, the line
2446 size is 32 bytes). Changing that would require a bunch of re-tuning
2447 effort. So let's set it in stone for now. */
2448 STATIC_ASSERT(OC_BITS_PER_LINE == 5);
2449 STATIC_ASSERT(OC_LINES_PER_SET == 2);
2451 /* Fundamentally we want an OCacheLine structure (see below) as follows:
2452 struct {
2453 Addr tag;
2454 UInt w32 [OC_W32S_PER_LINE];
2455 UChar descr[OC_W32S_PER_LINE];
2457 However, in various places, we want to set the w32[] and descr[] arrays to
2458 zero, or check if they are zero. This can be a very hot path (per bug
2459 446103). So, instead, we have a union which is either those two arrays
2460 (OCacheLine_Main) or simply an array of ULongs (OCacheLine_W64s). For the
2461 set-zero/test-zero operations, the OCacheLine_W64s are used.
2464 // To ensure that OCacheLine.descr[] will fit in an integral number of ULongs.
2465 STATIC_ASSERT(0 == (OC_W32S_PER_LINE % 8));
2467 #define OC_W64S_PER_MAIN /* "MAIN" meaning "struct OCacheLine_Main" */ \
2468 (OC_W32S_PER_LINE / 2 /* covers OCacheLine_Main.w32[] */ \
2469 + OC_W32S_PER_LINE / 8) /* covers OCacheLine_Main.descr[] */
2470 STATIC_ASSERT(OC_W64S_PER_MAIN == 5);
2472 typedef
2473 ULong OCacheLine_W64s[OC_W64S_PER_MAIN];
2475 typedef
2476 struct {
2477 UInt w32 [OC_W32S_PER_LINE];
2478 UChar descr[OC_W32S_PER_LINE];
2480 OCacheLine_Main;
2482 STATIC_ASSERT(sizeof(OCacheLine_W64s) == sizeof(OCacheLine_Main));
2484 typedef
2485 struct {
2486 Addr tag;
2487 union {
2488 OCacheLine_W64s w64s;
2489 OCacheLine_Main main;
2490 } u;
2492 OCacheLine;
2494 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2495 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2496 and 'z' if all the represented tags are zero. */
2497 static inline UChar classify_OCacheLine ( OCacheLine* line )
2499 UWord i;
2500 if (line->tag == 1/*invalid*/)
2501 return 'e'; /* EMPTY */
2502 tl_assert(is_valid_oc_tag(line->tag));
2504 // BEGIN fast special-case of the test loop below. This will detect
2505 // zero-ness (case 'z') for a subset of cases that the loop below will,
2506 // hence is safe.
2507 if (OC_W64S_PER_MAIN == 5) {
2508 if (line->u.w64s[0] == 0
2509 && line->u.w64s[1] == 0 && line->u.w64s[2] == 0
2510 && line->u.w64s[3] == 0 && line->u.w64s[4] == 0) {
2511 return 'z';
2513 } else {
2514 tl_assert2(0, "unsupported line size (classify_OCacheLine)");
2516 // END fast special-case of the test loop below.
2518 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2519 tl_assert(0 == ((~0xF) & line->u.main.descr[i]));
2520 if (line->u.main.w32[i] > 0 && line->u.main.descr[i] > 0)
2521 return 'n'; /* NONZERO - contains useful info */
2523 return 'z'; /* ZERO - no useful info */
2526 typedef
2527 struct {
2528 OCacheLine line[OC_LINES_PER_SET];
2530 OCacheSet;
2532 typedef
2533 struct {
2534 OCacheSet set[OC_N_SETS];
2536 OCache;
2538 static OCache* ocacheL1 = NULL;
2539 static UWord ocacheL1_event_ctr = 0;
2541 static void init_ocacheL2 ( void ); /* fwds */
2542 static void init_OCache ( void )
2544 UWord line, set;
2545 tl_assert(MC_(clo_mc_level) >= 3);
2546 tl_assert(ocacheL1 == NULL);
2547 SysRes sres = VG_(am_shadow_alloc)(sizeof(OCache));
2548 if (sr_isError(sres)) {
2549 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2550 sizeof(OCache), sr_Err(sres) );
2552 ocacheL1 = (void *)(Addr)sr_Res(sres);
2553 tl_assert(ocacheL1 != NULL);
2554 for (set = 0; set < OC_N_SETS; set++) {
2555 for (line = 0; line < OC_LINES_PER_SET; line++) {
2556 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2559 init_ocacheL2();
2562 static inline void moveLineForwards ( OCacheSet* set, UWord lineno )
2564 OCacheLine tmp;
2565 stats_ocacheL1_movefwds++;
2566 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2567 tmp = set->line[lineno-1];
2568 set->line[lineno-1] = set->line[lineno];
2569 set->line[lineno] = tmp;
2572 static inline void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2573 UWord i;
2574 if (OC_W32S_PER_LINE == 8) {
2575 // BEGIN fast special-case of the loop below
2576 tl_assert(OC_W64S_PER_MAIN == 5);
2577 line->u.w64s[0] = 0;
2578 line->u.w64s[1] = 0;
2579 line->u.w64s[2] = 0;
2580 line->u.w64s[3] = 0;
2581 line->u.w64s[4] = 0;
2582 // END fast special-case of the loop below
2583 } else {
2584 tl_assert2(0, "unsupported line size (zeroise_OCacheLine)");
2585 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2586 line->u.main.w32[i] = 0; /* NO ORIGIN */
2587 line->u.main.descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2590 line->tag = tag;
2593 //////////////////////////////////////////////////////////////
2594 //// OCache backing store
2596 // The backing store for ocacheL1 is, conceptually, an AVL tree of lines that
2597 // got ejected from the L1 (a "victim cache"), and which actually contain
2598 // useful info -- that is, for which classify_OCacheLine would return 'n' and
2599 // no other value. However, the tree can grow large, and searching/updating
2600 // it can be hot paths. Hence we "take out" 12 significant bits of the key by
2601 // having 4096 trees, and select one using HASH_OCACHE_TAG.
2603 // What that hash function returns isn't important so long as it is a pure
2604 // function of the tag values, and is < 4096. However, it is critical for
2605 // performance of long SARPs. Hence the extra shift of 11 bits. This means
2606 // each tree conceptually is assigned to contiguous sequences of 2048 lines in
2607 // the "line address space", giving some locality of reference when scanning
2608 // linearly through address space, as is done by a SARP. Changing that 11 to
2609 // 0 gives terrible performance on long SARPs, presumably because each new
2610 // line is in a different tree, hence we wind up thrashing the (CPU's) caches.
2612 // On 32-bit targets, we have to be a bit careful not to shift out so many
2613 // bits that not all 2^12 trees get used. That leads to the constraint
2614 // (OC_BITS_PER_LINE + 11 + 12) < 32. Note that the 11 is the only thing we
2615 // can change here. In this case we have OC_BITS_PER_LINE == 5, hence the
2616 // inequality is (28 < 32) and so we're good.
2618 // The value 11 was determined empirically from various Firefox runs. 10 or
2619 // 12 also work pretty well.
2621 static OSet* ocachesL2[4096];
2623 STATIC_ASSERT((OC_BITS_PER_LINE + 11 + 12) < 32);
2624 static inline UInt HASH_OCACHE_TAG ( Addr tag ) {
2625 return (UInt)((tag >> (OC_BITS_PER_LINE + 11)) & 0xFFF);
2628 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2629 return VG_(malloc)(cc, szB);
2631 static void ocacheL2_free ( void* v ) {
2632 VG_(free)( v );
2635 /* Stats: # nodes currently in tree */
2636 static UWord stats__ocacheL2_n_nodes = 0;
2638 static void init_ocacheL2 ( void )
2640 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2641 tl_assert(0 == offsetof(OCacheLine,tag));
2642 for (UInt i = 0; i < 4096; i++) {
2643 tl_assert(!ocachesL2[i]);
2644 ocachesL2[i]
2645 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2646 NULL, /* fast cmp */
2647 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2649 stats__ocacheL2_n_nodes = 0;
2652 /* Find line with the given tag in the tree, or NULL if not found. */
2653 static inline OCacheLine* ocacheL2_find_tag ( Addr tag )
2655 OCacheLine* line;
2656 tl_assert(is_valid_oc_tag(tag));
2657 stats__ocacheL2_finds++;
2658 OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
2659 line = VG_(OSetGen_Lookup)( oset, &tag );
2660 return line;
2663 /* Delete the line with the given tag from the tree, if it is present, and
2664 free up the associated memory. */
2665 static void ocacheL2_del_tag ( Addr tag )
2667 OCacheLine* line;
2668 tl_assert(is_valid_oc_tag(tag));
2669 stats__ocacheL2_dels++;
2670 OSet* oset = ocachesL2[HASH_OCACHE_TAG(tag)];
2671 line = VG_(OSetGen_Remove)( oset, &tag );
2672 if (line) {
2673 VG_(OSetGen_FreeNode)(oset, line);
2674 tl_assert(stats__ocacheL2_n_nodes > 0);
2675 stats__ocacheL2_n_nodes--;
2679 /* Add a copy of the given line to the tree. It must not already be
2680 present. */
2681 static void ocacheL2_add_line ( OCacheLine* line )
2683 OCacheLine* copy;
2684 tl_assert(is_valid_oc_tag(line->tag));
2685 OSet* oset = ocachesL2[HASH_OCACHE_TAG(line->tag)];
2686 copy = VG_(OSetGen_AllocNode)( oset, sizeof(OCacheLine) );
2687 *copy = *line;
2688 stats__ocacheL2_adds++;
2689 VG_(OSetGen_Insert)( oset, copy );
2690 stats__ocacheL2_n_nodes++;
2691 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2692 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2695 ////
2696 //////////////////////////////////////////////////////////////
2698 __attribute__((noinline))
2699 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2701 OCacheLine *victim, *inL2;
2702 UChar c;
2703 UWord line;
2704 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2705 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2706 UWord tag = a & tagmask;
2707 tl_assert(setno < OC_N_SETS);
2709 /* we already tried line == 0; skip therefore. */
2710 for (line = 1; line < OC_LINES_PER_SET; line++) {
2711 if (ocacheL1->set[setno].line[line].tag == tag) {
2712 switch (line) {
2713 // with OC_LINES_PER_SET equal to 2 this is the only possible case
2714 case 1:
2715 stats_ocacheL1_found_at_1++;
2716 break;
2717 #if OC_LINES_PER_SET > 2
2718 default:
2719 stats_ocacheL1_found_at_N++;
2720 break;
2721 #endif
2723 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2724 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2725 moveLineForwards( &ocacheL1->set[setno], line );
2726 line--;
2728 return &ocacheL1->set[setno].line[line];
2732 /* A miss. Use the last slot. Implicitly this means we're
2733 ejecting the line in the last slot. */
2734 stats_ocacheL1_misses++;
2735 tl_assert(line == OC_LINES_PER_SET);
2736 line--;
2737 tl_assert(line > 0);
2739 /* First, move the to-be-ejected line to the L2 cache. */
2740 victim = &ocacheL1->set[setno].line[line];
2741 c = classify_OCacheLine(victim);
2742 switch (c) {
2743 case 'e':
2744 /* the line is empty (has invalid tag); ignore it. */
2745 break;
2746 case 'z':
2747 /* line contains zeroes. We must ensure the backing store is
2748 updated accordingly, either by copying the line there
2749 verbatim, or by ensuring it isn't present there. We
2750 choose the latter on the basis that it reduces the size of
2751 the backing store. */
2752 ocacheL2_del_tag( victim->tag );
2753 break;
2754 case 'n':
2755 /* line contains at least one real, useful origin. Copy it
2756 to the backing store. */
2757 stats_ocacheL1_lossage++;
2758 inL2 = ocacheL2_find_tag( victim->tag );
2759 if (inL2) {
2760 *inL2 = *victim;
2761 } else {
2762 ocacheL2_add_line( victim );
2764 break;
2765 default:
2766 tl_assert(0);
2769 /* Now we must reload the L1 cache from the backing tree, if
2770 possible. */
2771 tl_assert(tag != victim->tag); /* stay sane */
2772 inL2 = ocacheL2_find_tag( tag );
2773 if (inL2) {
2774 /* We're in luck. It's in the L2. */
2775 ocacheL1->set[setno].line[line] = *inL2;
2776 } else {
2777 /* Missed at both levels of the cache hierarchy. We have to
2778 declare it as full of zeroes (unknown origins). */
2779 stats__ocacheL2_misses++;
2780 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2783 /* Move it one forwards */
2784 moveLineForwards( &ocacheL1->set[setno], line );
2785 line--;
2787 return &ocacheL1->set[setno].line[line];
2790 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2792 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2793 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2794 UWord tag = a & tagmask;
2796 stats_ocacheL1_find++;
2798 if (OC_ENABLE_ASSERTIONS) {
2799 tl_assert(setno >= 0 && setno < OC_N_SETS);
2800 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2803 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2804 return &ocacheL1->set[setno].line[0];
2807 return find_OCacheLine_SLOW( a );
2810 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2812 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2813 //// Set the origins for a+0 .. a+7
2814 { OCacheLine* line;
2815 UWord lineoff = oc_line_offset(a);
2816 if (OC_ENABLE_ASSERTIONS) {
2817 tl_assert(lineoff >= 0
2818 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2820 line = find_OCacheLine( a );
2821 line->u.main.descr[lineoff+0] = 0xF;
2822 line->u.main.descr[lineoff+1] = 0xF;
2823 line->u.main.w32[lineoff+0] = otag;
2824 line->u.main.w32[lineoff+1] = otag;
2826 //// END inlined, specialised version of MC_(helperc_b_store8)
2830 /*------------------------------------------------------------*/
2831 /*--- Aligned fast case permission setters, ---*/
2832 /*--- for dealing with stacks ---*/
2833 /*------------------------------------------------------------*/
2835 /*--------------------- 32-bit ---------------------*/
2837 /* Nb: by "aligned" here we mean 4-byte aligned */
2839 static INLINE void make_aligned_word32_undefined ( Addr a )
2841 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2843 #ifndef PERF_FAST_STACK2
2844 make_mem_undefined(a, 4);
2845 #else
2847 UWord sm_off;
2848 SecMap* sm;
2850 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2851 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2852 make_mem_undefined(a, 4);
2853 return;
2856 sm = get_secmap_for_writing_low(a);
2857 sm_off = SM_OFF(a);
2858 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2860 #endif
2863 static INLINE
2864 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2866 make_aligned_word32_undefined(a);
2867 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2868 //// Set the origins for a+0 .. a+3
2869 { OCacheLine* line;
2870 UWord lineoff = oc_line_offset(a);
2871 if (OC_ENABLE_ASSERTIONS) {
2872 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2874 line = find_OCacheLine( a );
2875 line->u.main.descr[lineoff] = 0xF;
2876 line->u.main.w32[lineoff] = otag;
2878 //// END inlined, specialised version of MC_(helperc_b_store4)
2881 static INLINE
2882 void make_aligned_word32_noaccess ( Addr a )
2884 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2886 #ifndef PERF_FAST_STACK2
2887 MC_(make_mem_noaccess)(a, 4);
2888 #else
2890 UWord sm_off;
2891 SecMap* sm;
2893 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2894 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2895 MC_(make_mem_noaccess)(a, 4);
2896 return;
2899 sm = get_secmap_for_writing_low(a);
2900 sm_off = SM_OFF(a);
2901 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2903 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2904 //// Set the origins for a+0 .. a+3.
2905 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2906 OCacheLine* line;
2907 UWord lineoff = oc_line_offset(a);
2908 if (OC_ENABLE_ASSERTIONS) {
2909 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2911 line = find_OCacheLine( a );
2912 line->u.main.descr[lineoff] = 0;
2914 //// END inlined, specialised version of MC_(helperc_b_store4)
2916 #endif
2919 /*--------------------- 64-bit ---------------------*/
2921 /* Nb: by "aligned" here we mean 8-byte aligned */
2923 static INLINE void make_aligned_word64_undefined ( Addr a )
2925 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2927 #ifndef PERF_FAST_STACK2
2928 make_mem_undefined(a, 8);
2929 #else
2931 UWord sm_off16;
2932 SecMap* sm;
2934 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2935 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2936 make_mem_undefined(a, 8);
2937 return;
2940 sm = get_secmap_for_writing_low(a);
2941 sm_off16 = SM_OFF_16(a);
2942 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
2944 #endif
2947 static INLINE
2948 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2950 make_aligned_word64_undefined(a);
2951 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2952 //// Set the origins for a+0 .. a+7
2953 { OCacheLine* line;
2954 UWord lineoff = oc_line_offset(a);
2955 tl_assert(lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2956 line = find_OCacheLine( a );
2957 line->u.main.descr[lineoff+0] = 0xF;
2958 line->u.main.descr[lineoff+1] = 0xF;
2959 line->u.main.w32[lineoff+0] = otag;
2960 line->u.main.w32[lineoff+1] = otag;
2962 //// END inlined, specialised version of MC_(helperc_b_store8)
2965 static INLINE
2966 void make_aligned_word64_noaccess ( Addr a )
2968 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2970 #ifndef PERF_FAST_STACK2
2971 MC_(make_mem_noaccess)(a, 8);
2972 #else
2974 UWord sm_off16;
2975 SecMap* sm;
2977 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2978 PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2979 MC_(make_mem_noaccess)(a, 8);
2980 return;
2983 sm = get_secmap_for_writing_low(a);
2984 sm_off16 = SM_OFF_16(a);
2985 sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
2987 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2988 //// Clear the origins for a+0 .. a+7.
2989 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2990 OCacheLine* line;
2991 UWord lineoff = oc_line_offset(a);
2992 tl_assert(lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2993 line = find_OCacheLine( a );
2994 line->u.main.descr[lineoff+0] = 0;
2995 line->u.main.descr[lineoff+1] = 0;
2997 //// END inlined, specialised version of MC_(helperc_b_store8)
2999 #endif
3003 /*------------------------------------------------------------*/
3004 /*--- Stack pointer adjustment ---*/
3005 /*------------------------------------------------------------*/
3007 #ifdef PERF_FAST_STACK
3008 # define MAYBE_USED
3009 #else
3010 # define MAYBE_USED __attribute__((unused))
3011 #endif
3013 /*--------------- adjustment by 4 bytes ---------------*/
3015 MAYBE_USED
3016 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
3018 UInt otag = ecu | MC_OKIND_STACK;
3019 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
3020 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3021 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3022 } else {
3023 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
3027 MAYBE_USED
3028 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
3030 PROF_EVENT(MCPE_NEW_MEM_STACK_4);
3031 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3032 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3033 } else {
3034 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
3038 MAYBE_USED
3039 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
3041 PROF_EVENT(MCPE_DIE_MEM_STACK_4);
3042 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3043 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3044 } else {
3045 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
3049 /*--------------- adjustment by 8 bytes ---------------*/
3051 MAYBE_USED
3052 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
3054 UInt otag = ecu | MC_OKIND_STACK;
3055 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
3056 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3057 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3058 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3059 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3060 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
3061 } else {
3062 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
3066 MAYBE_USED
3067 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
3069 PROF_EVENT(MCPE_NEW_MEM_STACK_8);
3070 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3072 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3073 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3074 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3075 } else {
3076 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
3080 MAYBE_USED
3081 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
3083 PROF_EVENT(MCPE_DIE_MEM_STACK_8);
3084 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3085 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3086 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3087 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3088 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3089 } else {
3090 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
3094 /*--------------- adjustment by 12 bytes ---------------*/
3096 MAYBE_USED
3097 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
3099 UInt otag = ecu | MC_OKIND_STACK;
3100 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
3101 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3102 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3103 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3104 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3105 /* from previous test we don't have 8-alignment at offset +0,
3106 hence must have 8 alignment at offsets +4/-4. Hence safe to
3107 do 4 at +0 and then 8 at +4/. */
3108 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3109 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
3110 } else {
3111 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
3115 MAYBE_USED
3116 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
3118 PROF_EVENT(MCPE_NEW_MEM_STACK_12);
3119 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3120 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3121 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3122 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3123 /* from previous test we don't have 8-alignment at offset +0,
3124 hence must have 8 alignment at offsets +4/-4. Hence safe to
3125 do 4 at +0 and then 8 at +4/. */
3126 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3127 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3128 } else {
3129 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
3133 MAYBE_USED
3134 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
3136 PROF_EVENT(MCPE_DIE_MEM_STACK_12);
3137 /* Note the -12 in the test */
3138 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
3139 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
3140 -4. */
3141 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3142 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3143 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3144 /* We have 4-alignment at +0, but we don't have 8-alignment at
3145 -12. So we must have 8-alignment at -8. Hence do 4 at -12
3146 and then 8 at -8. */
3147 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3148 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3149 } else {
3150 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
3154 /*--------------- adjustment by 16 bytes ---------------*/
3156 MAYBE_USED
3157 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
3159 UInt otag = ecu | MC_OKIND_STACK;
3160 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3161 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3162 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3163 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3164 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3165 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3166 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3167 Hence do 4 at +0, 8 at +4, 4 at +12. */
3168 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3169 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3170 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3171 } else {
3172 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
3176 MAYBE_USED
3177 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
3179 PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3180 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3181 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3182 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3183 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3184 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3185 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3186 Hence do 4 at +0, 8 at +4, 4 at +12. */
3187 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3188 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3189 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3190 } else {
3191 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
3195 MAYBE_USED
3196 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
3198 PROF_EVENT(MCPE_DIE_MEM_STACK_16);
3199 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3200 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3201 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3202 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
3203 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3204 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
3205 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3206 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3207 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3208 } else {
3209 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
3213 /*--------------- adjustment by 32 bytes ---------------*/
3215 MAYBE_USED
3216 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
3218 UInt otag = ecu | MC_OKIND_STACK;
3219 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3220 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3221 /* Straightforward */
3222 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3223 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3224 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3225 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3226 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3227 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3228 +0,+28. */
3229 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3230 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3231 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3232 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3233 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3234 } else {
3235 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3239 MAYBE_USED
3240 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3242 PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3243 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3244 /* Straightforward */
3245 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3246 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3247 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3248 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3249 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3250 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
3251 +0,+28. */
3252 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3253 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3254 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3255 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3256 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3257 } else {
3258 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3262 MAYBE_USED
3263 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3265 PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3266 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3267 /* Straightforward */
3268 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3269 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3270 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3271 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3272 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3273 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
3274 4 at -32,-4. */
3275 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3276 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3277 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3278 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3279 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
3280 } else {
3281 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3285 /*--------------- adjustment by 112 bytes ---------------*/
3287 MAYBE_USED
3288 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3290 UInt otag = ecu | MC_OKIND_STACK;
3291 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3292 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3293 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3294 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3295 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3296 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3297 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3298 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3299 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3300 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3301 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3302 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3303 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3304 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3305 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3306 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3307 } else {
3308 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3312 MAYBE_USED
3313 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3315 PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3316 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3317 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3318 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3319 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3320 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3321 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3322 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3323 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3324 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3325 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3326 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3327 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3328 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3329 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3330 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3331 } else {
3332 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3336 MAYBE_USED
3337 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3339 PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3340 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3341 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3342 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3343 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3344 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3345 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3346 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3347 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3348 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3349 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3350 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3351 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3352 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3353 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3354 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3355 } else {
3356 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3360 /*--------------- adjustment by 128 bytes ---------------*/
3362 MAYBE_USED
3363 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3365 UInt otag = ecu | MC_OKIND_STACK;
3366 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3367 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3368 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
3369 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3370 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3371 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3372 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3373 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3374 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3375 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3376 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3377 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3378 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3379 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3380 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3381 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3382 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3383 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3384 } else {
3385 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3389 MAYBE_USED
3390 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3392 PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3393 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3394 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3395 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3396 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3397 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3398 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3399 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3400 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3401 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3402 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3403 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3404 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3405 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3406 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3407 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3408 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3409 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3410 } else {
3411 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3415 MAYBE_USED
3416 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3418 PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3419 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3420 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3421 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3422 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3423 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3424 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3425 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3426 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3427 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3428 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3429 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3430 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3431 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3432 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3433 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3434 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3435 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3436 } else {
3437 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3441 /*--------------- adjustment by 144 bytes ---------------*/
3443 MAYBE_USED
3444 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3446 UInt otag = ecu | MC_OKIND_STACK;
3447 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3448 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3449 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3450 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3451 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3452 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3453 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3454 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3455 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3456 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3457 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3458 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3459 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3460 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3461 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3462 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3463 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3464 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3465 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3466 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3467 } else {
3468 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3472 MAYBE_USED
3473 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3475 PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3476 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3477 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3478 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3479 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3480 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3481 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3482 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3483 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3484 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3485 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3486 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3487 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3488 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3489 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3490 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3491 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3492 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3493 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3494 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3495 } else {
3496 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3500 MAYBE_USED
3501 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3503 PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3504 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3505 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3506 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3507 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3508 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3509 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3510 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3511 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3512 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3513 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3514 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3515 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3516 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3517 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3518 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3519 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3520 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3521 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3522 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3523 } else {
3524 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3528 /*--------------- adjustment by 160 bytes ---------------*/
3530 MAYBE_USED
3531 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3533 UInt otag = ecu | MC_OKIND_STACK;
3534 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3535 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3536 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3537 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3538 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3539 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3540 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3541 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3542 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3543 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3544 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3545 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3546 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3547 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3548 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3549 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3550 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3551 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3552 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3553 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3554 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3555 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3556 } else {
3557 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3561 MAYBE_USED
3562 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3564 PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3565 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3566 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3567 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3568 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3569 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3570 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3571 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3572 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3573 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3574 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3575 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3576 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3577 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3578 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3579 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3580 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3581 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3582 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3583 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3584 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3585 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3586 } else {
3587 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3591 MAYBE_USED
3592 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3594 PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3595 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3596 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3597 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3598 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3599 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3600 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3601 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3602 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3603 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3604 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3605 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3606 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3607 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3608 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3609 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3610 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3611 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3612 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3613 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3614 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3615 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3616 } else {
3617 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3621 /*--------------- adjustment by N bytes ---------------*/
3623 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3625 UInt otag = ecu | MC_OKIND_STACK;
3626 PROF_EVENT(MCPE_NEW_MEM_STACK);
3627 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3630 static void mc_new_mem_stack ( Addr a, SizeT len )
3632 PROF_EVENT(MCPE_NEW_MEM_STACK);
3633 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3636 static void mc_die_mem_stack ( Addr a, SizeT len )
3638 PROF_EVENT(MCPE_DIE_MEM_STACK);
3639 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3643 /* The AMD64 ABI says:
3645 "The 128-byte area beyond the location pointed to by %rsp is considered
3646 to be reserved and shall not be modified by signal or interrupt
3647 handlers. Therefore, functions may use this area for temporary data
3648 that is not needed across function calls. In particular, leaf functions
3649 may use this area for their entire stack frame, rather than adjusting
3650 the stack pointer in the prologue and epilogue. This area is known as
3651 red zone [sic]."
3653 So after any call or return we need to mark this redzone as containing
3654 undefined values.
3656 Consider this: we're in function f. f calls g. g moves rsp down
3657 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3658 defined. g returns. f is buggy and reads from parts of the red zone
3659 that it didn't write on. But because g filled that area in, f is going
3660 to be picking up defined V bits and so any errors from reading bits of
3661 the red zone it didn't write, will be missed. The only solution I could
3662 think of was to make the red zone undefined when g returns to f.
3664 This is in accordance with the ABI, which makes it clear the redzone
3665 is volatile across function calls.
3667 The problem occurs the other way round too: f could fill the RZ up
3668 with defined values and g could mistakenly read them. So the RZ
3669 also needs to be nuked on function calls.
3673 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3674 improved so as to have a lower miss rate. */
3676 static UWord stats__nia_cache_queries = 0;
3677 static UWord stats__nia_cache_misses = 0;
3679 typedef
3680 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3681 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3682 WCacheEnt;
3684 #define N_NIA_TO_ECU_CACHE 511
3686 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3688 static void init_nia_to_ecu_cache ( void )
3690 UWord i;
3691 Addr zero_addr = 0;
3692 ExeContext* zero_ec;
3693 UInt zero_ecu;
3694 /* Fill all the slots with an entry for address zero, and the
3695 relevant otags accordingly. Hence the cache is initially filled
3696 with valid data. */
3697 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3698 tl_assert(zero_ec);
3699 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3700 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3701 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3702 nia_to_ecu_cache[i].nia0 = zero_addr;
3703 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3704 nia_to_ecu_cache[i].nia1 = zero_addr;
3705 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3709 static inline UInt convert_nia_to_ecu ( Addr nia )
3711 UWord i;
3712 UInt ecu;
3713 ExeContext* ec;
3715 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3717 stats__nia_cache_queries++;
3718 i = nia % N_NIA_TO_ECU_CACHE;
3719 tl_assert(i < N_NIA_TO_ECU_CACHE);
3721 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3722 return nia_to_ecu_cache[i].ecu0;
3724 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3725 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3726 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3727 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3728 # undef SWAP
3729 return nia_to_ecu_cache[i].ecu0;
3732 stats__nia_cache_misses++;
3733 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3734 tl_assert(ec);
3735 ecu = VG_(get_ECU_from_ExeContext)(ec);
3736 tl_assert(VG_(is_plausible_ECU)(ecu));
3738 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3739 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3741 nia_to_ecu_cache[i].nia0 = nia;
3742 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3743 return ecu;
3747 /* This marks the stack as addressible but undefined, after a call or
3748 return for a target that has an ABI defined stack redzone. It
3749 happens quite a lot and needs to be fast. This is the version for
3750 origin tracking. The non-origin-tracking version is below. */
3751 VG_REGPARM(3)
3752 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
3754 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
3755 if (0)
3756 VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3757 base, len, nia );
3759 UInt ecu = convert_nia_to_ecu ( nia );
3760 tl_assert(VG_(is_plausible_ECU)(ecu));
3762 UInt otag = ecu | MC_OKIND_STACK;
3764 # if 0
3765 /* Slow(ish) version, which is fairly easily seen to be correct.
3767 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3768 make_aligned_word64_undefined_w_otag(base + 0, otag);
3769 make_aligned_word64_undefined_w_otag(base + 8, otag);
3770 make_aligned_word64_undefined_w_otag(base + 16, otag);
3771 make_aligned_word64_undefined_w_otag(base + 24, otag);
3773 make_aligned_word64_undefined_w_otag(base + 32, otag);
3774 make_aligned_word64_undefined_w_otag(base + 40, otag);
3775 make_aligned_word64_undefined_w_otag(base + 48, otag);
3776 make_aligned_word64_undefined_w_otag(base + 56, otag);
3778 make_aligned_word64_undefined_w_otag(base + 64, otag);
3779 make_aligned_word64_undefined_w_otag(base + 72, otag);
3780 make_aligned_word64_undefined_w_otag(base + 80, otag);
3781 make_aligned_word64_undefined_w_otag(base + 88, otag);
3783 make_aligned_word64_undefined_w_otag(base + 96, otag);
3784 make_aligned_word64_undefined_w_otag(base + 104, otag);
3785 make_aligned_word64_undefined_w_otag(base + 112, otag);
3786 make_aligned_word64_undefined_w_otag(base + 120, otag);
3787 } else {
3788 MC_(make_mem_undefined_w_otag)(base, len, otag);
3790 # endif
3792 /* Idea is: go fast when
3793 * 8-aligned and length is 128
3794 * the sm is available in the main primary map
3795 * the address range falls entirely with a single secondary map
3796 If all those conditions hold, just update the V+A bits by writing
3797 directly into the vabits array. (If the sm was distinguished, this
3798 will make a copy and then write to it.)
3800 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3801 /* Now we know the address range is suitably sized and aligned. */
3802 UWord a_lo = (UWord)(base);
3803 UWord a_hi = (UWord)(base + 128 - 1);
3804 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3805 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3806 /* Now we know the entire range is within the main primary map. */
3807 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3808 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3809 if (LIKELY(pm_off_lo == pm_off_hi)) {
3810 /* Now we know that the entire address range falls within a
3811 single secondary map, and that that secondary 'lives' in
3812 the main primary map. */
3813 SecMap* sm = get_secmap_for_writing_low(a_lo);
3814 UWord v_off16 = SM_OFF_16(a_lo);
3815 UShort* p = &sm->vabits16[v_off16];
3816 p[ 0] = VA_BITS16_UNDEFINED;
3817 p[ 1] = VA_BITS16_UNDEFINED;
3818 p[ 2] = VA_BITS16_UNDEFINED;
3819 p[ 3] = VA_BITS16_UNDEFINED;
3820 p[ 4] = VA_BITS16_UNDEFINED;
3821 p[ 5] = VA_BITS16_UNDEFINED;
3822 p[ 6] = VA_BITS16_UNDEFINED;
3823 p[ 7] = VA_BITS16_UNDEFINED;
3824 p[ 8] = VA_BITS16_UNDEFINED;
3825 p[ 9] = VA_BITS16_UNDEFINED;
3826 p[10] = VA_BITS16_UNDEFINED;
3827 p[11] = VA_BITS16_UNDEFINED;
3828 p[12] = VA_BITS16_UNDEFINED;
3829 p[13] = VA_BITS16_UNDEFINED;
3830 p[14] = VA_BITS16_UNDEFINED;
3831 p[15] = VA_BITS16_UNDEFINED;
3832 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3833 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3834 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3835 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3836 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3837 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3838 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3839 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3840 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3841 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3842 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3843 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3844 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3845 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3846 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3847 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3848 return;
3853 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3854 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3855 /* Now we know the address range is suitably sized and aligned. */
3856 UWord a_lo = (UWord)(base);
3857 UWord a_hi = (UWord)(base + 288 - 1);
3858 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3859 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3860 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3861 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3862 if (LIKELY(pm_off_lo == pm_off_hi)) {
3863 /* Now we know that the entire address range falls within a
3864 single secondary map, and that that secondary 'lives' in
3865 the main primary map. */
3866 SecMap* sm = get_secmap_for_writing_low(a_lo);
3867 UWord v_off16 = SM_OFF_16(a_lo);
3868 UShort* p = &sm->vabits16[v_off16];
3869 p[ 0] = VA_BITS16_UNDEFINED;
3870 p[ 1] = VA_BITS16_UNDEFINED;
3871 p[ 2] = VA_BITS16_UNDEFINED;
3872 p[ 3] = VA_BITS16_UNDEFINED;
3873 p[ 4] = VA_BITS16_UNDEFINED;
3874 p[ 5] = VA_BITS16_UNDEFINED;
3875 p[ 6] = VA_BITS16_UNDEFINED;
3876 p[ 7] = VA_BITS16_UNDEFINED;
3877 p[ 8] = VA_BITS16_UNDEFINED;
3878 p[ 9] = VA_BITS16_UNDEFINED;
3879 p[10] = VA_BITS16_UNDEFINED;
3880 p[11] = VA_BITS16_UNDEFINED;
3881 p[12] = VA_BITS16_UNDEFINED;
3882 p[13] = VA_BITS16_UNDEFINED;
3883 p[14] = VA_BITS16_UNDEFINED;
3884 p[15] = VA_BITS16_UNDEFINED;
3885 p[16] = VA_BITS16_UNDEFINED;
3886 p[17] = VA_BITS16_UNDEFINED;
3887 p[18] = VA_BITS16_UNDEFINED;
3888 p[19] = VA_BITS16_UNDEFINED;
3889 p[20] = VA_BITS16_UNDEFINED;
3890 p[21] = VA_BITS16_UNDEFINED;
3891 p[22] = VA_BITS16_UNDEFINED;
3892 p[23] = VA_BITS16_UNDEFINED;
3893 p[24] = VA_BITS16_UNDEFINED;
3894 p[25] = VA_BITS16_UNDEFINED;
3895 p[26] = VA_BITS16_UNDEFINED;
3896 p[27] = VA_BITS16_UNDEFINED;
3897 p[28] = VA_BITS16_UNDEFINED;
3898 p[29] = VA_BITS16_UNDEFINED;
3899 p[30] = VA_BITS16_UNDEFINED;
3900 p[31] = VA_BITS16_UNDEFINED;
3901 p[32] = VA_BITS16_UNDEFINED;
3902 p[33] = VA_BITS16_UNDEFINED;
3903 p[34] = VA_BITS16_UNDEFINED;
3904 p[35] = VA_BITS16_UNDEFINED;
3905 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3906 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3907 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3908 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3909 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3910 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3911 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3912 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3913 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3914 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3915 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3916 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3917 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3918 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3919 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3920 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3921 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3922 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3923 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3924 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3925 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3926 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3927 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3928 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3929 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3930 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3931 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3932 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3933 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3934 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3935 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3936 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3937 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3938 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3939 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3940 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3941 return;
3946 /* else fall into slow case */
3947 MC_(make_mem_undefined_w_otag)(base, len, otag);
3951 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3952 specialised for the non-origin-tracking case. */
3953 VG_REGPARM(2)
3954 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
3956 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
3957 if (0)
3958 VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3959 base, len );
3961 # if 0
3962 /* Slow(ish) version, which is fairly easily seen to be correct.
3964 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3965 make_aligned_word64_undefined(base + 0);
3966 make_aligned_word64_undefined(base + 8);
3967 make_aligned_word64_undefined(base + 16);
3968 make_aligned_word64_undefined(base + 24);
3970 make_aligned_word64_undefined(base + 32);
3971 make_aligned_word64_undefined(base + 40);
3972 make_aligned_word64_undefined(base + 48);
3973 make_aligned_word64_undefined(base + 56);
3975 make_aligned_word64_undefined(base + 64);
3976 make_aligned_word64_undefined(base + 72);
3977 make_aligned_word64_undefined(base + 80);
3978 make_aligned_word64_undefined(base + 88);
3980 make_aligned_word64_undefined(base + 96);
3981 make_aligned_word64_undefined(base + 104);
3982 make_aligned_word64_undefined(base + 112);
3983 make_aligned_word64_undefined(base + 120);
3984 } else {
3985 make_mem_undefined(base, len);
3987 # endif
3989 /* Idea is: go fast when
3990 * 8-aligned and length is 128
3991 * the sm is available in the main primary map
3992 * the address range falls entirely with a single secondary map
3993 If all those conditions hold, just update the V+A bits by writing
3994 directly into the vabits array. (If the sm was distinguished, this
3995 will make a copy and then write to it.)
3997 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3998 /* Now we know the address range is suitably sized and aligned. */
3999 UWord a_lo = (UWord)(base);
4000 UWord a_hi = (UWord)(base + 128 - 1);
4001 tl_assert(a_lo < a_hi); // paranoia: detect overflow
4002 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4003 /* Now we know the entire range is within the main primary map. */
4004 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4005 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4006 if (LIKELY(pm_off_lo == pm_off_hi)) {
4007 /* Now we know that the entire address range falls within a
4008 single secondary map, and that that secondary 'lives' in
4009 the main primary map. */
4010 SecMap* sm = get_secmap_for_writing_low(a_lo);
4011 UWord v_off16 = SM_OFF_16(a_lo);
4012 UShort* p = &sm->vabits16[v_off16];
4013 p[ 0] = VA_BITS16_UNDEFINED;
4014 p[ 1] = VA_BITS16_UNDEFINED;
4015 p[ 2] = VA_BITS16_UNDEFINED;
4016 p[ 3] = VA_BITS16_UNDEFINED;
4017 p[ 4] = VA_BITS16_UNDEFINED;
4018 p[ 5] = VA_BITS16_UNDEFINED;
4019 p[ 6] = VA_BITS16_UNDEFINED;
4020 p[ 7] = VA_BITS16_UNDEFINED;
4021 p[ 8] = VA_BITS16_UNDEFINED;
4022 p[ 9] = VA_BITS16_UNDEFINED;
4023 p[10] = VA_BITS16_UNDEFINED;
4024 p[11] = VA_BITS16_UNDEFINED;
4025 p[12] = VA_BITS16_UNDEFINED;
4026 p[13] = VA_BITS16_UNDEFINED;
4027 p[14] = VA_BITS16_UNDEFINED;
4028 p[15] = VA_BITS16_UNDEFINED;
4029 return;
4034 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
4035 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
4036 /* Now we know the address range is suitably sized and aligned. */
4037 UWord a_lo = (UWord)(base);
4038 UWord a_hi = (UWord)(base + 288 - 1);
4039 tl_assert(a_lo < a_hi); // paranoia: detect overflow
4040 if (a_hi <= MAX_PRIMARY_ADDRESS) {
4041 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4042 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4043 if (LIKELY(pm_off_lo == pm_off_hi)) {
4044 /* Now we know that the entire address range falls within a
4045 single secondary map, and that that secondary 'lives' in
4046 the main primary map. */
4047 SecMap* sm = get_secmap_for_writing_low(a_lo);
4048 UWord v_off16 = SM_OFF_16(a_lo);
4049 UShort* p = &sm->vabits16[v_off16];
4050 p[ 0] = VA_BITS16_UNDEFINED;
4051 p[ 1] = VA_BITS16_UNDEFINED;
4052 p[ 2] = VA_BITS16_UNDEFINED;
4053 p[ 3] = VA_BITS16_UNDEFINED;
4054 p[ 4] = VA_BITS16_UNDEFINED;
4055 p[ 5] = VA_BITS16_UNDEFINED;
4056 p[ 6] = VA_BITS16_UNDEFINED;
4057 p[ 7] = VA_BITS16_UNDEFINED;
4058 p[ 8] = VA_BITS16_UNDEFINED;
4059 p[ 9] = VA_BITS16_UNDEFINED;
4060 p[10] = VA_BITS16_UNDEFINED;
4061 p[11] = VA_BITS16_UNDEFINED;
4062 p[12] = VA_BITS16_UNDEFINED;
4063 p[13] = VA_BITS16_UNDEFINED;
4064 p[14] = VA_BITS16_UNDEFINED;
4065 p[15] = VA_BITS16_UNDEFINED;
4066 p[16] = VA_BITS16_UNDEFINED;
4067 p[17] = VA_BITS16_UNDEFINED;
4068 p[18] = VA_BITS16_UNDEFINED;
4069 p[19] = VA_BITS16_UNDEFINED;
4070 p[20] = VA_BITS16_UNDEFINED;
4071 p[21] = VA_BITS16_UNDEFINED;
4072 p[22] = VA_BITS16_UNDEFINED;
4073 p[23] = VA_BITS16_UNDEFINED;
4074 p[24] = VA_BITS16_UNDEFINED;
4075 p[25] = VA_BITS16_UNDEFINED;
4076 p[26] = VA_BITS16_UNDEFINED;
4077 p[27] = VA_BITS16_UNDEFINED;
4078 p[28] = VA_BITS16_UNDEFINED;
4079 p[29] = VA_BITS16_UNDEFINED;
4080 p[30] = VA_BITS16_UNDEFINED;
4081 p[31] = VA_BITS16_UNDEFINED;
4082 p[32] = VA_BITS16_UNDEFINED;
4083 p[33] = VA_BITS16_UNDEFINED;
4084 p[34] = VA_BITS16_UNDEFINED;
4085 p[35] = VA_BITS16_UNDEFINED;
4086 return;
4091 /* else fall into slow case */
4092 make_mem_undefined(base, len);
4096 /* And this is an even more specialised case, for the case where there
4097 is no origin tracking, and the length is 128. */
4098 VG_REGPARM(1)
4099 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
4101 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
4102 if (0)
4103 VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
4105 # if 0
4106 /* Slow(ish) version, which is fairly easily seen to be correct.
4108 if (LIKELY( VG_IS_8_ALIGNED(base) )) {
4109 make_aligned_word64_undefined(base + 0);
4110 make_aligned_word64_undefined(base + 8);
4111 make_aligned_word64_undefined(base + 16);
4112 make_aligned_word64_undefined(base + 24);
4114 make_aligned_word64_undefined(base + 32);
4115 make_aligned_word64_undefined(base + 40);
4116 make_aligned_word64_undefined(base + 48);
4117 make_aligned_word64_undefined(base + 56);
4119 make_aligned_word64_undefined(base + 64);
4120 make_aligned_word64_undefined(base + 72);
4121 make_aligned_word64_undefined(base + 80);
4122 make_aligned_word64_undefined(base + 88);
4124 make_aligned_word64_undefined(base + 96);
4125 make_aligned_word64_undefined(base + 104);
4126 make_aligned_word64_undefined(base + 112);
4127 make_aligned_word64_undefined(base + 120);
4128 } else {
4129 make_mem_undefined(base, 128);
4131 # endif
4133 /* Idea is: go fast when
4134 * 16-aligned and length is 128
4135 * the sm is available in the main primary map
4136 * the address range falls entirely with a single secondary map
4137 If all those conditions hold, just update the V+A bits by writing
4138 directly into the vabits array. (If the sm was distinguished, this
4139 will make a copy and then write to it.)
4141 Typically this applies to amd64 'ret' instructions, since RSP is
4142 16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
4144 if (LIKELY( VG_IS_16_ALIGNED(base) )) {
4145 /* Now we know the address range is suitably sized and aligned. */
4146 UWord a_lo = (UWord)(base);
4147 UWord a_hi = (UWord)(base + 128 - 1);
4148 /* FIXME: come up with a sane story on the wraparound case
4149 (which of course cnanot happen, but still..) */
4150 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4151 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4152 /* Now we know the entire range is within the main primary map. */
4153 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4154 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4155 if (LIKELY(pm_off_lo == pm_off_hi)) {
4156 /* Now we know that the entire address range falls within a
4157 single secondary map, and that that secondary 'lives' in
4158 the main primary map. */
4159 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
4160 SecMap* sm = get_secmap_for_writing_low(a_lo);
4161 UWord v_off = SM_OFF(a_lo);
4162 UInt* w32 = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
4163 w32[ 0] = VA_BITS32_UNDEFINED;
4164 w32[ 1] = VA_BITS32_UNDEFINED;
4165 w32[ 2] = VA_BITS32_UNDEFINED;
4166 w32[ 3] = VA_BITS32_UNDEFINED;
4167 w32[ 4] = VA_BITS32_UNDEFINED;
4168 w32[ 5] = VA_BITS32_UNDEFINED;
4169 w32[ 6] = VA_BITS32_UNDEFINED;
4170 w32[ 7] = VA_BITS32_UNDEFINED;
4171 return;
4176 /* The same, but for when base is 8 % 16, which is the situation
4177 with RSP for amd64-ELF immediately after call instructions.
4179 if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
4180 /* Now we know the address range is suitably sized and aligned. */
4181 UWord a_lo = (UWord)(base);
4182 UWord a_hi = (UWord)(base + 128 - 1);
4183 /* FIXME: come up with a sane story on the wraparound case
4184 (which of course cnanot happen, but still..) */
4185 /* tl_assert(a_lo < a_hi); */ // paranoia: detect overflow
4186 if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4187 /* Now we know the entire range is within the main primary map. */
4188 UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4189 UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4190 if (LIKELY(pm_off_lo == pm_off_hi)) {
4191 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
4192 /* Now we know that the entire address range falls within a
4193 single secondary map, and that that secondary 'lives' in
4194 the main primary map. */
4195 SecMap* sm = get_secmap_for_writing_low(a_lo);
4196 UWord v_off16 = SM_OFF_16(a_lo);
4197 UShort* w16 = &sm->vabits16[v_off16];
4198 UInt* w32 = ASSUME_ALIGNED(UInt*, &w16[1]);
4199 /* The following assertion is commented out for obvious
4200 performance reasons, but was verified as valid when
4201 running the entire testsuite and also Firefox. */
4202 /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4203 w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
4204 w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
4205 w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
4206 w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
4207 w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
4208 w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
4209 w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
4210 w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
4211 w16[15] = VA_BITS16_UNDEFINED; // w16[15]
4212 return;
4217 /* else fall into slow case */
4218 PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
4219 make_mem_undefined(base, 128);
4223 /*------------------------------------------------------------*/
4224 /*--- Checking memory ---*/
4225 /*------------------------------------------------------------*/
4227 typedef
4228 enum {
4229 MC_Ok = 5,
4230 MC_AddrErr = 6,
4231 MC_ValueErr = 7
4233 MC_ReadResult;
4236 /* Check permissions for address range. If inadequate permissions
4237 exist, *bad_addr is set to the offending address, so the caller can
4238 know what it is. */
4240 /* Returns True if [a .. a+len) is not addressible. Otherwise,
4241 returns False, and if bad_addr is non-NULL, sets *bad_addr to
4242 indicate the lowest failing address. Functions below are
4243 similar. */
4244 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
4246 SizeT i;
4247 UWord vabits2;
4249 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
4250 for (i = 0; i < len; i++) {
4251 PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
4252 vabits2 = get_vabits2(a);
4253 if (VA_BITS2_NOACCESS != vabits2) {
4254 if (bad_addr != NULL) *bad_addr = a;
4255 return False;
4257 a++;
4259 return True;
4262 static Bool is_mem_addressable ( Addr a, SizeT len,
4263 /*OUT*/Addr* bad_addr )
4265 SizeT i;
4266 UWord vabits2;
4268 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
4269 for (i = 0; i < len; i++) {
4270 PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
4271 vabits2 = get_vabits2(a);
4272 if (VA_BITS2_NOACCESS == vabits2) {
4273 if (bad_addr != NULL) *bad_addr = a;
4274 return False;
4276 a++;
4278 return True;
4281 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
4282 /*OUT*/Addr* bad_addr,
4283 /*OUT*/UInt* otag )
4285 SizeT i;
4286 UWord vabits2;
4288 PROF_EVENT(MCPE_IS_MEM_DEFINED);
4289 DEBUG("is_mem_defined\n");
4291 if (otag) *otag = 0;
4292 if (bad_addr) *bad_addr = 0;
4293 for (i = 0; i < len; i++) {
4294 PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
4295 vabits2 = get_vabits2(a);
4296 if (VA_BITS2_DEFINED != vabits2) {
4297 // Error! Nb: Report addressability errors in preference to
4298 // definedness errors. And don't report definedeness errors unless
4299 // --undef-value-errors=yes.
4300 if (bad_addr) {
4301 *bad_addr = a;
4303 if (VA_BITS2_NOACCESS == vabits2) {
4304 return MC_AddrErr;
4306 if (MC_(clo_mc_level) >= 2) {
4307 if (otag && MC_(clo_mc_level) == 3) {
4308 *otag = MC_(helperc_b_load1)( a );
4310 return MC_ValueErr;
4313 a++;
4315 return MC_Ok;
4319 /* Like is_mem_defined but doesn't give up at the first uninitialised
4320 byte -- the entire range is always checked. This is important for
4321 detecting errors in the case where a checked range strays into
4322 invalid memory, but that fact is not detected by the ordinary
4323 is_mem_defined(), because of an undefined section that precedes the
4324 out of range section, possibly as a result of an alignment hole in
4325 the checked data. This version always checks the entire range and
4326 can report both a definedness and an accessbility error, if
4327 necessary. */
4328 static void is_mem_defined_comprehensive (
4329 Addr a, SizeT len,
4330 /*OUT*/Bool* errorV, /* is there a definedness err? */
4331 /*OUT*/Addr* bad_addrV, /* if so where? */
4332 /*OUT*/UInt* otagV, /* and what's its otag? */
4333 /*OUT*/Bool* errorA, /* is there an addressability err? */
4334 /*OUT*/Addr* bad_addrA /* if so where? */
4337 SizeT i;
4338 UWord vabits2;
4339 Bool already_saw_errV = False;
4341 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
4342 DEBUG("is_mem_defined_comprehensive\n");
4344 tl_assert(!(*errorV || *errorA));
4346 for (i = 0; i < len; i++) {
4347 PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
4348 vabits2 = get_vabits2(a);
4349 switch (vabits2) {
4350 case VA_BITS2_DEFINED:
4351 a++;
4352 break;
4353 case VA_BITS2_UNDEFINED:
4354 case VA_BITS2_PARTDEFINED:
4355 if (!already_saw_errV) {
4356 *errorV = True;
4357 *bad_addrV = a;
4358 if (MC_(clo_mc_level) == 3) {
4359 *otagV = MC_(helperc_b_load1)( a );
4360 } else {
4361 *otagV = 0;
4363 already_saw_errV = True;
4365 a++; /* keep going */
4366 break;
4367 case VA_BITS2_NOACCESS:
4368 *errorA = True;
4369 *bad_addrA = a;
4370 return; /* give up now. */
4371 default:
4372 tl_assert(0);
4378 /* Check a zero-terminated ascii string. Tricky -- don't want to
4379 examine the actual bytes, to find the end, until we're sure it is
4380 safe to do so. */
4382 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
4384 UWord vabits2;
4386 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
4387 DEBUG("mc_is_defined_asciiz\n");
4389 if (otag) *otag = 0;
4390 if (bad_addr) *bad_addr = 0;
4391 while (True) {
4392 PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
4393 vabits2 = get_vabits2(a);
4394 if (VA_BITS2_DEFINED != vabits2) {
4395 // Error! Nb: Report addressability errors in preference to
4396 // definedness errors. And don't report definedeness errors unless
4397 // --undef-value-errors=yes.
4398 if (bad_addr) {
4399 *bad_addr = a;
4401 if (VA_BITS2_NOACCESS == vabits2) {
4402 return MC_AddrErr;
4404 if (MC_(clo_mc_level) >= 2) {
4405 if (otag && MC_(clo_mc_level) == 3) {
4406 *otag = MC_(helperc_b_load1)( a );
4408 return MC_ValueErr;
4411 /* Ok, a is safe to read. */
4412 if (* ((UChar*)a) == 0) {
4413 return MC_Ok;
4415 a++;
4420 /*------------------------------------------------------------*/
4421 /*--- Memory event handlers ---*/
4422 /*------------------------------------------------------------*/
4424 static
4425 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
4426 Addr base, SizeT size )
4428 Addr bad_addr;
4429 Bool ok = is_mem_addressable ( base, size, &bad_addr );
4431 if (!ok) {
4432 switch (part) {
4433 case Vg_CoreSysCall:
4434 MC_(record_memparam_error) ( tid, bad_addr,
4435 /*isAddrErr*/True, s, 0/*otag*/ );
4436 break;
4438 case Vg_CoreSignal:
4439 MC_(record_core_mem_error)( tid, s );
4440 break;
4442 default:
4443 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
4448 static
4449 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
4450 Addr base, SizeT size )
4452 UInt otag = 0;
4453 Addr bad_addr;
4454 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
4456 if (MC_Ok != res) {
4457 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4459 switch (part) {
4460 case Vg_CoreSysCall:
4461 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4462 isAddrErr ? 0 : otag );
4463 break;
4465 case Vg_CoreSysCallArgInMem:
4466 MC_(record_regparam_error) ( tid, s, otag );
4467 break;
4469 /* If we're being asked to jump to a silly address, record an error
4470 message before potentially crashing the entire system. */
4471 case Vg_CoreTranslate:
4472 MC_(record_jump_error)( tid, bad_addr );
4473 break;
4475 default:
4476 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
4481 static
4482 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
4483 const HChar* s, Addr str )
4485 MC_ReadResult res;
4486 Addr bad_addr = 0; // shut GCC up
4487 UInt otag = 0;
4489 tl_assert(part == Vg_CoreSysCall);
4490 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4491 if (MC_Ok != res) {
4492 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4493 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4494 isAddrErr ? 0 : otag );
4498 /* Handling of mmap and mprotect is not as simple as it seems.
4500 The underlying semantics are that memory obtained from mmap is
4501 always initialised, but may be inaccessible. And changes to the
4502 protection of memory do not change its contents and hence not its
4503 definedness state. Problem is we can't model
4504 inaccessible-but-with-some-definedness state; once we mark memory
4505 as inaccessible we lose all info about definedness, and so can't
4506 restore that if it is later made accessible again.
4508 One obvious thing to do is this:
4510 mmap/mprotect NONE -> noaccess
4511 mmap/mprotect other -> defined
4513 The problem case here is: taking accessible memory, writing
4514 uninitialised data to it, mprotecting it NONE and later mprotecting
4515 it back to some accessible state causes the undefinedness to be
4516 lost.
4518 A better proposal is:
4520 (1) mmap NONE -> make noaccess
4521 (2) mmap other -> make defined
4523 (3) mprotect NONE -> # no change
4524 (4) mprotect other -> change any "noaccess" to "defined"
4526 (2) is OK because memory newly obtained from mmap really is defined
4527 (zeroed out by the kernel -- doing anything else would
4528 constitute a massive security hole.)
4530 (1) is OK because the only way to make the memory usable is via
4531 (4), in which case we also wind up correctly marking it all as
4532 defined.
4534 (3) is the weak case. We choose not to change memory state.
4535 (presumably the range is in some mixture of "defined" and
4536 "undefined", viz, accessible but with arbitrary V bits). Doing
4537 nothing means we retain the V bits, so that if the memory is
4538 later mprotected "other", the V bits remain unchanged, so there
4539 can be no false negatives. The bad effect is that if there's
4540 an access in the area, then MC cannot warn; but at least we'll
4541 get a SEGV to show, so it's better than nothing.
4543 Consider the sequence (3) followed by (4). Any memory that was
4544 "defined" or "undefined" previously retains its state (as
4545 required). Any memory that was "noaccess" before can only have
4546 been made that way by (1), and so it's OK to change it to
4547 "defined".
4549 See https://bugs.kde.org/show_bug.cgi?id=205541
4550 and https://bugs.kde.org/show_bug.cgi?id=210268
4552 static
4553 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4554 ULong di_handle )
4556 if (rr || ww || xx) {
4557 /* (2) mmap/mprotect other -> defined */
4558 MC_(make_mem_defined)(a, len);
4559 } else {
4560 /* (1) mmap/mprotect NONE -> noaccess */
4561 MC_(make_mem_noaccess)(a, len);
4565 static
4566 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4568 if (rr || ww || xx) {
4569 /* (4) mprotect other -> change any "noaccess" to "defined" */
4570 make_mem_defined_if_noaccess(a, len);
4571 } else {
4572 /* (3) mprotect NONE -> # no change */
4573 /* do nothing */
4578 static
4579 void mc_new_mem_startup( Addr a, SizeT len,
4580 Bool rr, Bool ww, Bool xx, ULong di_handle )
4582 // Because code is defined, initialised variables get put in the data
4583 // segment and are defined, and uninitialised variables get put in the
4584 // bss segment and are auto-zeroed (and so defined).
4586 // It's possible that there will be padding between global variables.
4587 // This will also be auto-zeroed, and marked as defined by Memcheck. If
4588 // a program uses it, Memcheck will not complain. This is arguably a
4589 // false negative, but it's a grey area -- the behaviour is defined (the
4590 // padding is zeroed) but it's probably not what the user intended. And
4591 // we can't avoid it.
4593 // Note: we generally ignore RWX permissions, because we can't track them
4594 // without requiring more than one A bit which would slow things down a
4595 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
4596 // So we mark any such pages as "unaddressable".
4597 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4598 a, (ULong)len, rr, ww, xx);
4599 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4602 static
4603 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4605 MC_(make_mem_defined)(a, len);
4609 /*------------------------------------------------------------*/
4610 /*--- Register event handlers ---*/
4611 /*------------------------------------------------------------*/
4613 /* Try and get a nonzero origin for the guest state section of thread
4614 tid characterised by (offset,size). Return 0 if nothing to show
4615 for it. */
4616 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4617 Int offset, SizeT size )
4619 Int sh2off;
4620 UInt area[3];
4621 UInt otag;
4622 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4623 if (sh2off == -1)
4624 return 0; /* This piece of guest state is not tracked */
4625 tl_assert(sh2off >= 0);
4626 tl_assert(0 == (sh2off % 4));
4627 area[0] = 0x31313131;
4628 area[2] = 0x27272727;
4629 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4630 tl_assert(area[0] == 0x31313131);
4631 tl_assert(area[2] == 0x27272727);
4632 otag = area[1];
4633 return otag;
4637 /* When some chunk of guest state is written, mark the corresponding
4638 shadow area as valid. This is used to initialise arbitrarily large
4639 chunks of guest state, hence the _SIZE value, which has to be as
4640 big as the biggest guest state.
4642 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4643 PtrdiffT offset, SizeT size)
4645 # define MAX_REG_WRITE_SIZE 2264
4646 UChar area[MAX_REG_WRITE_SIZE];
4647 tl_assert(size <= MAX_REG_WRITE_SIZE);
4648 VG_(memset)(area, V_BITS8_DEFINED, size);
4649 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4650 # undef MAX_REG_WRITE_SIZE
4653 static
4654 void mc_post_reg_write_clientcall ( ThreadId tid,
4655 PtrdiffT offset, SizeT size, Addr f)
4657 mc_post_reg_write(/*dummy*/0, tid, offset, size);
4660 /* Look at the definedness of the guest's shadow state for
4661 [offset, offset+len). If any part of that is undefined, record
4662 a parameter error.
4664 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4665 PtrdiffT offset, SizeT size)
4667 Int i;
4668 Bool bad;
4669 UInt otag;
4671 UChar area[16];
4672 tl_assert(size <= 16);
4674 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4676 bad = False;
4677 for (i = 0; i < size; i++) {
4678 if (area[i] != V_BITS8_DEFINED) {
4679 bad = True;
4680 break;
4684 if (!bad)
4685 return;
4687 /* We've found some undefinedness. See if we can also find an
4688 origin for it. */
4689 otag = mb_get_origin_for_guest_offset( tid, offset, size );
4690 MC_(record_regparam_error) ( tid, s, otag );
4694 /*------------------------------------------------------------*/
4695 /*--- Register-memory event handlers ---*/
4696 /*------------------------------------------------------------*/
4698 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4699 PtrdiffT guest_state_offset, SizeT size )
4701 SizeT i;
4702 UChar vbits8;
4703 Int offset;
4704 UInt d32;
4706 /* Slow loop. */
4707 for (i = 0; i < size; i++) {
4708 get_vbits8( a+i, &vbits8 );
4709 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4710 1, &vbits8 );
4713 if (MC_(clo_mc_level) != 3)
4714 return;
4716 /* Track origins. */
4717 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4718 if (offset == -1)
4719 return;
4721 switch (size) {
4722 case 1:
4723 d32 = MC_(helperc_b_load1)( a );
4724 break;
4725 case 2:
4726 d32 = MC_(helperc_b_load2)( a );
4727 break;
4728 case 4:
4729 d32 = MC_(helperc_b_load4)( a );
4730 break;
4731 case 8:
4732 d32 = MC_(helperc_b_load8)( a );
4733 break;
4734 case 16:
4735 d32 = MC_(helperc_b_load16)( a );
4736 break;
4737 case 32:
4738 d32 = MC_(helperc_b_load32)( a );
4739 break;
4740 default:
4741 tl_assert(0);
4744 VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4747 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4748 PtrdiffT guest_state_offset, Addr a,
4749 SizeT size )
4751 SizeT i;
4752 UChar vbits8;
4753 Int offset;
4754 UInt d32;
4756 /* Slow loop. */
4757 for (i = 0; i < size; i++) {
4758 VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4759 guest_state_offset+i, 1 );
4760 set_vbits8( a+i, vbits8 );
4763 if (MC_(clo_mc_level) != 3)
4764 return;
4766 /* Track origins. */
4767 offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4768 if (offset == -1)
4769 return;
4771 VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4772 switch (size) {
4773 case 1:
4774 MC_(helperc_b_store1)( a, d32 );
4775 break;
4776 case 2:
4777 MC_(helperc_b_store2)( a, d32 );
4778 break;
4779 case 4:
4780 MC_(helperc_b_store4)( a, d32 );
4781 break;
4782 case 8:
4783 MC_(helperc_b_store8)( a, d32 );
4784 break;
4785 case 16:
4786 MC_(helperc_b_store16)( a, d32 );
4787 break;
4788 case 32:
4789 MC_(helperc_b_store32)( a, d32 );
4790 break;
4791 default:
4792 tl_assert(0);
4797 /*------------------------------------------------------------*/
4798 /*--- Some static assertions ---*/
4799 /*------------------------------------------------------------*/
4801 /* The handwritten assembly helpers below have baked-in assumptions
4802 about various constant values. These assertions attempt to make
4803 that a bit safer by checking those values and flagging changes that
4804 would make the assembly invalid. Not perfect but it's better than
4805 nothing. */
4807 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4809 STATIC_ASSERT(VA_BITS8_DEFINED == 0xAA);
4810 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4812 STATIC_ASSERT(V_BITS32_DEFINED == 0x00000000);
4813 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4815 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4816 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4818 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4819 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4821 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4822 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4824 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4825 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4828 /*------------------------------------------------------------*/
4829 /*--- Functions called directly from generated code: ---*/
4830 /*--- Load/store handlers. ---*/
4831 /*------------------------------------------------------------*/
4833 /* Types: LOADV32, LOADV16, LOADV8 are:
4834 UWord fn ( Addr a )
4835 so they return 32-bits on 32-bit machines and 64-bits on
4836 64-bit machines. Addr has the same size as a host word.
4838 LOADV64 is always ULong fn ( Addr a )
4840 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4841 are a UWord, and for STOREV64 they are a ULong.
4844 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4845 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4846 primary map. This is all very tricky (and important!), so let's
4847 work through the maths by hand (below), *and* assert for these
4848 values at startup. */
4849 #define MASK(_szInBytes) \
4850 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4852 /* MASK only exists so as to define this macro. */
4853 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4854 ((_a) & MASK((_szInBits>>3)))
4856 /* On a 32-bit machine:
4858 N_PRIMARY_BITS == 16, so
4859 N_PRIMARY_MAP == 0x10000, so
4860 N_PRIMARY_MAP-1 == 0xFFFF, so
4861 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4863 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4864 = ~ ( 0xFFFF | 0xFFFF0000 )
4865 = ~ 0xFFFF'FFFF
4868 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4869 = ~ ( 0xFFFE | 0xFFFF0000 )
4870 = ~ 0xFFFF'FFFE
4873 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4874 = ~ ( 0xFFFC | 0xFFFF0000 )
4875 = ~ 0xFFFF'FFFC
4878 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4879 = ~ ( 0xFFF8 | 0xFFFF0000 )
4880 = ~ 0xFFFF'FFF8
4883 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4884 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4885 the 1-byte alignment case, it is always a zero value, since MASK(1)
4886 is zero. All as expected.
4888 On a 64-bit machine, it's more complex, since we're testing
4889 simultaneously for misalignment and for the address being at or
4890 above 64G:
4892 N_PRIMARY_BITS == 20, so
4893 N_PRIMARY_MAP == 0x100000, so
4894 N_PRIMARY_MAP-1 == 0xFFFFF, so
4895 (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4897 MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4898 = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4899 = ~ 0xF'FFFF'FFFF
4900 = 0xFFFF'FFF0'0000'0000
4902 MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4903 = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4904 = ~ 0xF'FFFF'FFFE
4905 = 0xFFFF'FFF0'0000'0001
4907 MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4908 = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4909 = ~ 0xF'FFFF'FFFC
4910 = 0xFFFF'FFF0'0000'0003
4912 MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4913 = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4914 = ~ 0xF'FFFF'FFF8
4915 = 0xFFFF'FFF0'0000'0007
4918 /*------------------------------------------------------------*/
4919 /*--- LOADV256 and LOADV128 ---*/
4920 /*------------------------------------------------------------*/
4922 static INLINE
4923 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4924 Addr a, SizeT nBits, Bool isBigEndian )
4926 PROF_EVENT(MCPE_LOADV_128_OR_256);
4928 #ifndef PERF_FAST_LOADV
4929 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4930 return;
4931 #else
4933 UWord sm_off16, vabits16, j;
4934 UWord nBytes = nBits / 8;
4935 UWord nULongs = nBytes / 8;
4936 SecMap* sm;
4938 if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4939 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4940 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4941 return;
4944 /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4945 suitably aligned, is mapped, and addressible. */
4946 for (j = 0; j < nULongs; j++) {
4947 sm = get_secmap_for_reading_low(a + 8*j);
4948 sm_off16 = SM_OFF_16(a + 8*j);
4949 vabits16 = sm->vabits16[sm_off16];
4951 // Convert V bits from compact memory form to expanded
4952 // register form.
4953 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4954 res[j] = V_BITS64_DEFINED;
4955 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4956 res[j] = V_BITS64_UNDEFINED;
4957 } else {
4958 /* Slow case: some block of 8 bytes are not all-defined or
4959 all-undefined. */
4960 PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4961 mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4962 return;
4965 return;
4967 #endif
4970 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4972 mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4974 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4976 mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4979 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4981 mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4983 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4985 mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4988 /*------------------------------------------------------------*/
4989 /*--- LOADV64 ---*/
4990 /*------------------------------------------------------------*/
4992 static INLINE
4993 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4995 PROF_EVENT(MCPE_LOADV64);
4997 #ifndef PERF_FAST_LOADV
4998 return mc_LOADVn_slow( a, 64, isBigEndian );
4999 #else
5001 UWord sm_off16, vabits16;
5002 SecMap* sm;
5004 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
5005 PROF_EVENT(MCPE_LOADV64_SLOW1);
5006 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
5009 sm = get_secmap_for_reading_low(a);
5010 sm_off16 = SM_OFF_16(a);
5011 vabits16 = sm->vabits16[sm_off16];
5013 // Handle common case quickly: a is suitably aligned, is mapped, and
5014 // addressible.
5015 // Convert V bits from compact memory form to expanded register form.
5016 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
5017 return V_BITS64_DEFINED;
5018 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
5019 return V_BITS64_UNDEFINED;
5020 } else {
5021 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
5022 PROF_EVENT(MCPE_LOADV64_SLOW2);
5023 return mc_LOADVn_slow( a, 64, isBigEndian );
5026 #endif
5029 // Generic for all platforms
5030 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
5032 return mc_LOADV64(a, True);
5035 // Non-generic assembly for arm32-linux
5036 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5037 && defined(VGP_arm_linux)
5038 /* See mc_main_asm.c */
5040 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5041 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris) || defined(VGP_x86_freebsd))
5042 /* See mc_main_asm.c */
5044 #else
5045 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5046 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
5048 return mc_LOADV64(a, False);
5050 #endif
5052 /*------------------------------------------------------------*/
5053 /*--- STOREV64 ---*/
5054 /*------------------------------------------------------------*/
5056 static INLINE
5057 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
5059 PROF_EVENT(MCPE_STOREV64);
5061 #ifndef PERF_FAST_STOREV
5062 // XXX: this slow case seems to be marginally faster than the fast case!
5063 // Investigate further.
5064 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5065 #else
5067 UWord sm_off16, vabits16;
5068 SecMap* sm;
5070 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
5071 PROF_EVENT(MCPE_STOREV64_SLOW1);
5072 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5073 return;
5076 sm = get_secmap_for_reading_low(a);
5077 sm_off16 = SM_OFF_16(a);
5078 vabits16 = sm->vabits16[sm_off16];
5080 // To understand the below cleverness, see the extensive comments
5081 // in MC_(helperc_STOREV8).
5082 if (LIKELY(V_BITS64_DEFINED == vbits64)) {
5083 if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
5084 return;
5086 if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
5087 sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
5088 return;
5090 PROF_EVENT(MCPE_STOREV64_SLOW2);
5091 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5092 return;
5094 if (V_BITS64_UNDEFINED == vbits64) {
5095 if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
5096 return;
5098 if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
5099 sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
5100 return;
5102 PROF_EVENT(MCPE_STOREV64_SLOW3);
5103 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5104 return;
5107 PROF_EVENT(MCPE_STOREV64_SLOW4);
5108 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5110 #endif
5113 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
5115 mc_STOREV64(a, vbits64, True);
5117 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
5119 mc_STOREV64(a, vbits64, False);
5122 /*------------------------------------------------------------*/
5123 /*--- LOADV32 ---*/
5124 /*------------------------------------------------------------*/
5126 static INLINE
5127 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
5129 PROF_EVENT(MCPE_LOADV32);
5131 #ifndef PERF_FAST_LOADV
5132 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5133 #else
5135 UWord sm_off, vabits8;
5136 SecMap* sm;
5138 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5139 PROF_EVENT(MCPE_LOADV32_SLOW1);
5140 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5143 sm = get_secmap_for_reading_low(a);
5144 sm_off = SM_OFF(a);
5145 vabits8 = sm->vabits8[sm_off];
5147 // Handle common case quickly: a is suitably aligned, is mapped, and the
5148 // entire word32 it lives in is addressible.
5149 // Convert V bits from compact memory form to expanded register form.
5150 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5151 // Almost certainly not necessary, but be paranoid.
5152 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5153 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
5154 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
5155 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
5156 } else {
5157 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5158 PROF_EVENT(MCPE_LOADV32_SLOW2);
5159 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5162 #endif
5165 // Generic for all platforms
5166 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
5168 return mc_LOADV32(a, True);
5171 // Non-generic assembly for arm32-linux
5172 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5173 && defined(VGP_arm_linux)
5174 /* See mc_main_asm.c */
5176 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5177 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5178 /* See mc_main_asm.c */
5180 #else
5181 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5182 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
5184 return mc_LOADV32(a, False);
5186 #endif
5188 /*------------------------------------------------------------*/
5189 /*--- STOREV32 ---*/
5190 /*------------------------------------------------------------*/
5192 static INLINE
5193 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
5195 PROF_EVENT(MCPE_STOREV32);
5197 #ifndef PERF_FAST_STOREV
5198 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5199 #else
5201 UWord sm_off, vabits8;
5202 SecMap* sm;
5204 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5205 PROF_EVENT(MCPE_STOREV32_SLOW1);
5206 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5207 return;
5210 sm = get_secmap_for_reading_low(a);
5211 sm_off = SM_OFF(a);
5212 vabits8 = sm->vabits8[sm_off];
5214 // To understand the below cleverness, see the extensive comments
5215 // in MC_(helperc_STOREV8).
5216 if (LIKELY(V_BITS32_DEFINED == vbits32)) {
5217 if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
5218 return;
5220 if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
5221 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
5222 return;
5224 PROF_EVENT(MCPE_STOREV32_SLOW2);
5225 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5226 return;
5228 if (V_BITS32_UNDEFINED == vbits32) {
5229 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
5230 return;
5232 if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
5233 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
5234 return;
5236 PROF_EVENT(MCPE_STOREV32_SLOW3);
5237 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5238 return;
5241 PROF_EVENT(MCPE_STOREV32_SLOW4);
5242 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5244 #endif
5247 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
5249 mc_STOREV32(a, vbits32, True);
5251 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
5253 mc_STOREV32(a, vbits32, False);
5256 /*------------------------------------------------------------*/
5257 /*--- LOADV16 ---*/
5258 /*------------------------------------------------------------*/
5260 static INLINE
5261 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
5263 PROF_EVENT(MCPE_LOADV16);
5265 #ifndef PERF_FAST_LOADV
5266 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5267 #else
5269 UWord sm_off, vabits8;
5270 SecMap* sm;
5272 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5273 PROF_EVENT(MCPE_LOADV16_SLOW1);
5274 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5277 sm = get_secmap_for_reading_low(a);
5278 sm_off = SM_OFF(a);
5279 vabits8 = sm->vabits8[sm_off];
5280 // Handle common case quickly: a is suitably aligned, is mapped, and is
5281 // addressible.
5282 // Convert V bits from compact memory form to expanded register form
5283 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
5284 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
5285 else {
5286 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5287 // the two sub-bytes.
5288 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
5289 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
5290 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
5291 else {
5292 /* Slow case: the two bytes are not all-defined or all-undefined. */
5293 PROF_EVENT(MCPE_LOADV16_SLOW2);
5294 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5298 #endif
5301 // Generic for all platforms
5302 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
5304 return mc_LOADV16(a, True);
5307 // Non-generic assembly for arm32-linux
5308 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5309 && defined(VGP_arm_linux)
5310 __asm__( /* Derived from NCode template */
5311 ".text \n"
5312 ".align 2 \n"
5313 ".global vgMemCheck_helperc_LOADV16le \n"
5314 ".type vgMemCheck_helperc_LOADV16le, %function \n"
5315 "vgMemCheck_helperc_LOADV16le: \n" //
5316 " tst r0, #1 \n" //
5317 " bne .LLV16LEc12 \n" // if misaligned
5318 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5319 " movw r3, #:lower16:primary_map \n" //
5320 " uxth r1, r0 \n" // r1 = sec-map-offB
5321 " movt r3, #:upper16:primary_map \n" //
5322 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5323 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5324 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5325 " bne .LLV16LEc0 \n" // no, goto .LLV16LEc0
5326 ".LLV16LEh9: \n" //
5327 " mov r0, #0xFFFFFFFF \n" //
5328 " lsl r0, r0, #16 \n" // V_BITS16_DEFINED | top16safe
5329 " bx lr \n" //
5330 ".LLV16LEc0: \n" //
5331 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5332 " bne .LLV16LEc4 \n" //
5333 ".LLV16LEc2: \n" //
5334 " mov r0, #0xFFFFFFFF \n" // V_BITS16_UNDEFINED | top16safe
5335 " bx lr \n" //
5336 ".LLV16LEc4: \n" //
5337 // r1 holds sec-map-VABITS8. r0 holds the address and is 2-aligned.
5338 // Extract the relevant 4 bits and inspect.
5339 " and r2, r0, #2 \n" // addr & 2
5340 " add r2, r2, r2 \n" // 2 * (addr & 2)
5341 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5342 " and r1, r1, #15 \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5344 " cmp r1, #0xA \n" // VA_BITS4_DEFINED
5345 " beq .LLV16LEh9 \n" //
5347 " cmp r1, #0x5 \n" // VA_BITS4_UNDEFINED
5348 " beq .LLV16LEc2 \n" //
5350 ".LLV16LEc12: \n" //
5351 " push {r4, lr} \n" //
5352 " mov r2, #0 \n" //
5353 " mov r1, #16 \n" //
5354 " bl mc_LOADVn_slow \n" //
5355 " pop {r4, pc} \n" //
5356 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5357 ".previous\n"
5360 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5361 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5362 __asm__(
5363 ".text\n"
5364 ".align 16\n"
5365 ".global vgMemCheck_helperc_LOADV16le\n"
5366 ".type vgMemCheck_helperc_LOADV16le, @function\n"
5367 "vgMemCheck_helperc_LOADV16le:\n"
5368 " test $0x1, %eax\n"
5369 " jne .LLV16LE5\n" /* jump if not aligned */
5370 " mov %eax, %edx\n"
5371 " shr $0x10, %edx\n"
5372 " mov primary_map(,%edx,4), %ecx\n"
5373 " movzwl %ax, %edx\n"
5374 " shr $0x2, %edx\n"
5375 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5376 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */
5377 " jne .LLV16LE2\n" /* jump if not all 32bits defined */
5378 ".LLV16LE1:\n"
5379 " mov $0xffff0000,%eax\n" /* V_BITS16_DEFINED | top16safe */
5380 " ret\n"
5381 ".LLV16LE2:\n"
5382 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5383 " jne .LLV16LE4\n" /* jump if not all 32bits undefined */
5384 ".LLV16LE3:\n"
5385 " or $0xffffffff,%eax\n" /* V_BITS16_UNDEFINED | top16safe */
5386 " ret\n"
5387 ".LLV16LE4:\n"
5388 " mov %eax, %ecx\n"
5389 " and $0x2, %ecx\n"
5390 " add %ecx, %ecx\n"
5391 " sar %cl, %edx\n"
5392 " and $0xf, %edx\n"
5393 " cmp $0xa, %edx\n"
5394 " je .LLV16LE1\n" /* jump if all 16bits are defined */
5395 " cmp $0x5, %edx\n"
5396 " je .LLV16LE3\n" /* jump if all 16bits are undefined */
5397 ".LLV16LE5:\n"
5398 " xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 16, 0) */
5399 " mov $16, %edx\n"
5400 " jmp mc_LOADVn_slow\n"
5401 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5402 ".previous\n"
5405 #else
5406 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5407 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5409 return mc_LOADV16(a, False);
5411 #endif
5413 /*------------------------------------------------------------*/
5414 /*--- STOREV16 ---*/
5415 /*------------------------------------------------------------*/
5417 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5418 static INLINE
5419 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5421 UInt shift;
5422 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
5423 shift = (a & 2) << 1; // shift by 0 or 4
5424 vabits8 >>= shift; // shift the four bits to the bottom
5425 // check 2 x vabits2 != VA_BITS2_NOACCESS
5426 return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5427 && ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5430 static INLINE
5431 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5433 PROF_EVENT(MCPE_STOREV16);
5435 #ifndef PERF_FAST_STOREV
5436 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5437 #else
5439 UWord sm_off, vabits8;
5440 SecMap* sm;
5442 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5443 PROF_EVENT(MCPE_STOREV16_SLOW1);
5444 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5445 return;
5448 sm = get_secmap_for_reading_low(a);
5449 sm_off = SM_OFF(a);
5450 vabits8 = sm->vabits8[sm_off];
5452 // To understand the below cleverness, see the extensive comments
5453 // in MC_(helperc_STOREV8).
5454 if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5455 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5456 return;
5458 if (!is_distinguished_sm(sm)
5459 && accessible_vabits4_in_vabits8(a, vabits8)) {
5460 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5461 &(sm->vabits8[sm_off]) );
5462 return;
5464 PROF_EVENT(MCPE_STOREV16_SLOW2);
5465 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5467 if (V_BITS16_UNDEFINED == vbits16) {
5468 if (vabits8 == VA_BITS8_UNDEFINED) {
5469 return;
5471 if (!is_distinguished_sm(sm)
5472 && accessible_vabits4_in_vabits8(a, vabits8)) {
5473 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5474 &(sm->vabits8[sm_off]) );
5475 return;
5477 PROF_EVENT(MCPE_STOREV16_SLOW3);
5478 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5479 return;
5482 PROF_EVENT(MCPE_STOREV16_SLOW4);
5483 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5485 #endif
5489 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5491 mc_STOREV16(a, vbits16, True);
5493 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5495 mc_STOREV16(a, vbits16, False);
5498 /*------------------------------------------------------------*/
5499 /*--- LOADV8 ---*/
5500 /*------------------------------------------------------------*/
5502 /* Note: endianness is irrelevant for size == 1 */
5504 // Non-generic assembly for arm32-linux
5505 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5506 && defined(VGP_arm_linux)
5507 __asm__( /* Derived from NCode template */
5508 ".text \n"
5509 ".align 2 \n"
5510 ".global vgMemCheck_helperc_LOADV8 \n"
5511 ".type vgMemCheck_helperc_LOADV8, %function \n"
5512 "vgMemCheck_helperc_LOADV8: \n" //
5513 " lsr r2, r0, #16 \n" // r2 = pri-map-ix
5514 " movw r3, #:lower16:primary_map \n" //
5515 " uxth r1, r0 \n" // r1 = sec-map-offB
5516 " movt r3, #:upper16:primary_map \n" //
5517 " ldr r2, [r3, r2, lsl #2] \n" // r2 = sec-map
5518 " ldrb r1, [r2, r1, lsr #2] \n" // r1 = sec-map-VABITS8
5519 " cmp r1, #0xAA \n" // r1 == VA_BITS8_DEFINED?
5520 " bne .LLV8c0 \n" // no, goto .LLV8c0
5521 ".LLV8h9: \n" //
5522 " mov r0, #0xFFFFFF00 \n" // V_BITS8_DEFINED | top24safe
5523 " bx lr \n" //
5524 ".LLV8c0: \n" //
5525 " cmp r1, #0x55 \n" // VA_BITS8_UNDEFINED
5526 " bne .LLV8c4 \n" //
5527 ".LLV8c2: \n" //
5528 " mov r0, #0xFFFFFFFF \n" // V_BITS8_UNDEFINED | top24safe
5529 " bx lr \n" //
5530 ".LLV8c4: \n" //
5531 // r1 holds sec-map-VABITS8
5532 // r0 holds the address. Extract the relevant 2 bits and inspect.
5533 " and r2, r0, #3 \n" // addr & 3
5534 " add r2, r2, r2 \n" // 2 * (addr & 3)
5535 " lsr r1, r1, r2 \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5536 " and r1, r1, #3 \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5538 " cmp r1, #2 \n" // VA_BITS2_DEFINED
5539 " beq .LLV8h9 \n" //
5541 " cmp r1, #1 \n" // VA_BITS2_UNDEFINED
5542 " beq .LLV8c2 \n" //
5544 " push {r4, lr} \n" //
5545 " mov r2, #0 \n" //
5546 " mov r1, #8 \n" //
5547 " bl mc_LOADVn_slow \n" //
5548 " pop {r4, pc} \n" //
5549 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5550 ".previous\n"
5553 /* Non-generic assembly for x86-linux */
5554 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5555 && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5556 __asm__(
5557 ".text\n"
5558 ".align 16\n"
5559 ".global vgMemCheck_helperc_LOADV8\n"
5560 ".type vgMemCheck_helperc_LOADV8, @function\n"
5561 "vgMemCheck_helperc_LOADV8:\n"
5562 " mov %eax, %edx\n"
5563 " shr $0x10, %edx\n"
5564 " mov primary_map(,%edx,4), %ecx\n"
5565 " movzwl %ax, %edx\n"
5566 " shr $0x2, %edx\n"
5567 " movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5568 " cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED? */
5569 " jne .LLV8LE2\n" /* jump if not defined */
5570 ".LLV8LE1:\n"
5571 " mov $0xffffff00, %eax\n" /* V_BITS8_DEFINED | top24safe */
5572 " ret\n"
5573 ".LLV8LE2:\n"
5574 " cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */
5575 " jne .LLV8LE4\n" /* jump if not all 32bits are undefined */
5576 ".LLV8LE3:\n"
5577 " or $0xffffffff, %eax\n" /* V_BITS8_UNDEFINED | top24safe */
5578 " ret\n"
5579 ".LLV8LE4:\n"
5580 " mov %eax, %ecx\n"
5581 " and $0x3, %ecx\n"
5582 " add %ecx, %ecx\n"
5583 " sar %cl, %edx\n"
5584 " and $0x3, %edx\n"
5585 " cmp $0x2, %edx\n"
5586 " je .LLV8LE1\n" /* jump if all 8bits are defined */
5587 " cmp $0x1, %edx\n"
5588 " je .LLV8LE3\n" /* jump if all 8bits are undefined */
5589 " xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 8, 0) */
5590 " mov $0x8, %edx\n"
5591 " jmp mc_LOADVn_slow\n"
5592 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5593 ".previous\n"
5596 #else
5597 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5598 VG_REGPARM(1)
5599 UWord MC_(helperc_LOADV8) ( Addr a )
5601 PROF_EVENT(MCPE_LOADV8);
5603 #ifndef PERF_FAST_LOADV
5604 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5605 #else
5607 UWord sm_off, vabits8;
5608 SecMap* sm;
5610 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5611 PROF_EVENT(MCPE_LOADV8_SLOW1);
5612 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5615 sm = get_secmap_for_reading_low(a);
5616 sm_off = SM_OFF(a);
5617 vabits8 = sm->vabits8[sm_off];
5618 // Convert V bits from compact memory form to expanded register form
5619 // Handle common case quickly: a is mapped, and the entire
5620 // word32 it lives in is addressible.
5621 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
5622 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5623 else {
5624 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5625 // the single byte.
5626 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5627 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
5628 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5629 else {
5630 /* Slow case: the byte is not all-defined or all-undefined. */
5631 PROF_EVENT(MCPE_LOADV8_SLOW2);
5632 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5636 #endif
5638 #endif
5640 /*------------------------------------------------------------*/
5641 /*--- STOREV8 ---*/
5642 /*------------------------------------------------------------*/
5644 VG_REGPARM(2)
5645 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5647 PROF_EVENT(MCPE_STOREV8);
5649 #ifndef PERF_FAST_STOREV
5650 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5651 #else
5653 UWord sm_off, vabits8;
5654 SecMap* sm;
5656 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5657 PROF_EVENT(MCPE_STOREV8_SLOW1);
5658 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5659 return;
5662 sm = get_secmap_for_reading_low(a);
5663 sm_off = SM_OFF(a);
5664 vabits8 = sm->vabits8[sm_off];
5666 // Clevernesses to speed up storing V bits.
5667 // The 64/32/16 bit cases also have similar clevernesses, but it
5668 // works a little differently to the code below.
5670 // Cleverness 1: sometimes we don't have to write the shadow memory at
5671 // all, if we can tell that what we want to write is the same as what is
5672 // already there. These cases are marked below as "defined on defined" and
5673 // "undefined on undefined".
5675 // Cleverness 2:
5676 // We also avoid to call mc_STOREVn_slow if the V bits can directly
5677 // be written in the secondary map. V bits can be directly written
5678 // if 4 conditions are respected:
5679 // * The address for which V bits are written is naturally aligned
5680 // on 1 byte for STOREV8 (this is always true)
5681 // on 2 bytes for STOREV16
5682 // on 4 bytes for STOREV32
5683 // on 8 bytes for STOREV64.
5684 // * V bits being written are either fully defined or fully undefined.
5685 // (for partially defined V bits, V bits cannot be directly written,
5686 // as the secondary vbits table must be maintained).
5687 // * the secmap is not distinguished (distinguished maps cannot be
5688 // modified).
5689 // * the memory corresponding to the V bits being written is
5690 // accessible (if one or more bytes are not accessible,
5691 // we must call mc_STOREVn_slow in order to report accessibility
5692 // errors).
5693 // Note that for STOREV32 and STOREV64, it is too expensive
5694 // to verify the accessibility of each byte for the benefit it
5695 // brings. Instead, a quicker check is done by comparing to
5696 // VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5697 // but misses some opportunity of direct modifications.
5698 // Checking each byte accessibility was measured for
5699 // STOREV32+perf tests and was slowing down all perf tests.
5700 // The cases corresponding to cleverness 2 are marked below as
5701 // "direct mod".
5702 if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5703 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5704 return; // defined on defined
5706 if (!is_distinguished_sm(sm)
5707 && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5708 // direct mod
5709 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5710 &(sm->vabits8[sm_off]) );
5711 return;
5713 PROF_EVENT(MCPE_STOREV8_SLOW2);
5714 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5715 return;
5717 if (V_BITS8_UNDEFINED == vbits8) {
5718 if (vabits8 == VA_BITS8_UNDEFINED) {
5719 return; // undefined on undefined
5721 if (!is_distinguished_sm(sm)
5722 && (VA_BITS2_NOACCESS
5723 != extract_vabits2_from_vabits8(a, vabits8))) {
5724 // direct mod
5725 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5726 &(sm->vabits8[sm_off]) );
5727 return;
5729 PROF_EVENT(MCPE_STOREV8_SLOW3);
5730 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5731 return;
5734 // Partially defined word
5735 PROF_EVENT(MCPE_STOREV8_SLOW4);
5736 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5738 #endif
5742 /*------------------------------------------------------------*/
5743 /*--- Functions called directly from generated code: ---*/
5744 /*--- Value-check failure handlers. ---*/
5745 /*------------------------------------------------------------*/
5747 /* Call these ones when an origin is available ... */
5748 VG_REGPARM(1)
5749 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5750 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5753 VG_REGPARM(1)
5754 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5755 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5758 VG_REGPARM(1)
5759 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5760 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5763 VG_REGPARM(1)
5764 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5765 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5768 VG_REGPARM(2)
5769 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5770 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5773 /* ... and these when an origin isn't available. */
5775 VG_REGPARM(0)
5776 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5777 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5780 VG_REGPARM(0)
5781 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5782 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5785 VG_REGPARM(0)
5786 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5787 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5790 VG_REGPARM(0)
5791 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5792 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5795 VG_REGPARM(1)
5796 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5797 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5801 /*------------------------------------------------------------*/
5802 /*--- Metadata get/set functions, for client requests. ---*/
5803 /*------------------------------------------------------------*/
5805 // Nb: this expands the V+A bits out into register-form V bits, even though
5806 // they're in memory. This is for backward compatibility, and because it's
5807 // probably what the user wants.
5809 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5810 error [no longer used], 3 == addressing error. */
5811 /* Nb: We used to issue various definedness/addressability errors from here,
5812 but we took them out because they ranged from not-very-helpful to
5813 downright annoying, and they complicated the error data structures. */
5814 static Int mc_get_or_set_vbits_for_client (
5815 Addr a,
5816 Addr vbits,
5817 SizeT szB,
5818 Bool setting, /* True <=> set vbits, False <=> get vbits */
5819 Bool is_client_request /* True <=> real user request
5820 False <=> internal call from gdbserver */
5823 SizeT i;
5824 Bool ok;
5825 UChar vbits8;
5827 /* Check that arrays are addressible before doing any getting/setting.
5828 vbits to be checked only for real user request. */
5829 for (i = 0; i < szB; i++) {
5830 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5831 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5832 return 3;
5836 /* Do the copy */
5837 if (setting) {
5838 /* setting */
5839 for (i = 0; i < szB; i++) {
5840 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5841 tl_assert(ok);
5843 } else {
5844 /* getting */
5845 for (i = 0; i < szB; i++) {
5846 ok = get_vbits8(a + i, &vbits8);
5847 tl_assert(ok);
5848 ((UChar*)vbits)[i] = vbits8;
5850 if (is_client_request)
5851 // The bytes in vbits[] have now been set, so mark them as such.
5852 MC_(make_mem_defined)(vbits, szB);
5855 return 1;
5859 /*------------------------------------------------------------*/
5860 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
5861 /*------------------------------------------------------------*/
5863 /* For the memory leak detector, say whether an entire 64k chunk of
5864 address space is possibly in use, or not. If in doubt return
5865 True.
5867 Bool MC_(is_within_valid_secondary) ( Addr a )
5869 SecMap* sm = maybe_get_secmap_for ( a );
5870 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5871 /* Definitely not in use. */
5872 return False;
5873 } else {
5874 return True;
5879 /* For the memory leak detector, say whether or not a given word
5880 address is to be regarded as valid. */
5881 Bool MC_(is_valid_aligned_word) ( Addr a )
5883 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5884 tl_assert(VG_IS_WORD_ALIGNED(a));
5885 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5886 return False;
5887 if (sizeof(UWord) == 8) {
5888 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5889 return False;
5891 if (UNLIKELY(MC_(in_ignored_range)(a)))
5892 return False;
5893 else
5894 return True;
5898 /*------------------------------------------------------------*/
5899 /*--- Initialisation ---*/
5900 /*------------------------------------------------------------*/
5902 static void init_shadow_memory ( void )
5904 Int i;
5905 SecMap* sm;
5907 tl_assert(V_BIT_UNDEFINED == 1);
5908 tl_assert(V_BIT_DEFINED == 0);
5909 tl_assert(V_BITS8_UNDEFINED == 0xFF);
5910 tl_assert(V_BITS8_DEFINED == 0);
5912 /* Build the 3 distinguished secondaries */
5913 sm = &sm_distinguished[SM_DIST_NOACCESS];
5914 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5916 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5917 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5919 sm = &sm_distinguished[SM_DIST_DEFINED];
5920 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5922 /* Set up the primary map. */
5923 /* These entries gradually get overwritten as the used address
5924 space expands. */
5925 for (i = 0; i < N_PRIMARY_MAP; i++)
5926 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5928 /* Auxiliary primary maps */
5929 init_auxmap_L1_L2();
5931 /* auxmap_size = auxmap_used = 0;
5932 no ... these are statically initialised */
5934 /* Secondary V bit table */
5935 secVBitTable = createSecVBitTable();
5939 /*------------------------------------------------------------*/
5940 /*--- Sanity check machinery (permanently engaged) ---*/
5941 /*------------------------------------------------------------*/
5943 static Bool mc_cheap_sanity_check ( void )
5945 n_sanity_cheap++;
5946 PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5947 /* Check for sane operating level */
5948 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5949 return False;
5950 /* nothing else useful we can rapidly check */
5951 return True;
5954 static Bool mc_expensive_sanity_check ( void )
5956 Int i;
5957 Word n_secmaps_found;
5958 SecMap* sm;
5959 const HChar* errmsg;
5960 Bool bad = False;
5962 if (0) VG_(printf)("expensive sanity check\n");
5963 if (0) return True;
5965 n_sanity_expensive++;
5966 PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5968 /* Check for sane operating level */
5969 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5970 return False;
5972 /* Check that the 3 distinguished SMs are still as they should be. */
5974 /* Check noaccess DSM. */
5975 sm = &sm_distinguished[SM_DIST_NOACCESS];
5976 for (i = 0; i < SM_CHUNKS; i++)
5977 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5978 bad = True;
5980 /* Check undefined DSM. */
5981 sm = &sm_distinguished[SM_DIST_UNDEFINED];
5982 for (i = 0; i < SM_CHUNKS; i++)
5983 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5984 bad = True;
5986 /* Check defined DSM. */
5987 sm = &sm_distinguished[SM_DIST_DEFINED];
5988 for (i = 0; i < SM_CHUNKS; i++)
5989 if (sm->vabits8[i] != VA_BITS8_DEFINED)
5990 bad = True;
5992 if (bad) {
5993 VG_(printf)("memcheck expensive sanity: "
5994 "distinguished_secondaries have changed\n");
5995 return False;
5998 /* If we're not checking for undefined value errors, the secondary V bit
5999 * table should be empty. */
6000 if (MC_(clo_mc_level) == 1) {
6001 if (0 != VG_(OSetGen_Size)(secVBitTable))
6002 return False;
6005 /* check the auxiliary maps, very thoroughly */
6006 n_secmaps_found = 0;
6007 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
6008 if (errmsg) {
6009 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
6010 return False;
6013 /* n_secmaps_found is now the number referred to by the auxiliary
6014 primary map. Now add on the ones referred to by the main
6015 primary map. */
6016 for (i = 0; i < N_PRIMARY_MAP; i++) {
6017 if (primary_map[i] == NULL) {
6018 bad = True;
6019 } else {
6020 if (!is_distinguished_sm(primary_map[i]))
6021 n_secmaps_found++;
6025 /* check that the number of secmaps issued matches the number that
6026 are reachable (iow, no secmap leaks) */
6027 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
6028 bad = True;
6030 if (bad) {
6031 VG_(printf)("memcheck expensive sanity: "
6032 "apparent secmap leakage\n");
6033 return False;
6036 if (bad) {
6037 VG_(printf)("memcheck expensive sanity: "
6038 "auxmap covers wrong address space\n");
6039 return False;
6042 /* there is only one pointer to each secmap (expensive) */
6044 return True;
6047 /*------------------------------------------------------------*/
6048 /*--- Command line args ---*/
6049 /*------------------------------------------------------------*/
6051 /* 31 Aug 2015: Vectorised code is now so widespread that
6052 --partial-loads-ok needs to be enabled by default on all platforms.
6053 Not doing so causes lots of false errors. */
6054 Bool MC_(clo_partial_loads_ok) = True;
6055 Long MC_(clo_freelist_vol) = 20LL*1000LL*1000LL;
6056 Long MC_(clo_freelist_big_blocks) = 1LL*1000LL*1000LL;
6057 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
6058 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
6059 UInt MC_(clo_show_leak_kinds) = R2S(Possible) | R2S(Unreached);
6060 UInt MC_(clo_error_for_leak_kinds) = R2S(Possible) | R2S(Unreached);
6061 UInt MC_(clo_leak_check_heuristics) = H2S(LchStdString)
6062 | H2S( LchLength64)
6063 | H2S( LchNewArray)
6064 | H2S( LchMultipleInheritance);
6065 Bool MC_(clo_xtree_leak) = False;
6066 const HChar* MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
6067 Bool MC_(clo_workaround_gcc296_bugs) = False;
6068 Int MC_(clo_malloc_fill) = -1;
6069 Int MC_(clo_free_fill) = -1;
6070 KeepStacktraces MC_(clo_keep_stacktraces) = KS_alloc_and_free;
6071 Int MC_(clo_mc_level) = 2;
6072 Bool MC_(clo_show_mismatched_frees) = True;
6073 Bool MC_(clo_show_realloc_size_zero) = True;
6075 ExpensiveDefinednessChecks
6076 MC_(clo_expensive_definedness_checks) = EdcAUTO;
6078 Bool MC_(clo_ignore_range_below_sp) = False;
6079 UInt MC_(clo_ignore_range_below_sp__first_offset) = 0;
6080 UInt MC_(clo_ignore_range_below_sp__last_offset) = 0;
6082 static const HChar * MC_(parse_leak_heuristics_tokens) =
6083 "-,stdstring,length64,newarray,multipleinheritance";
6084 /* The first heuristic value (LchNone) has no keyword, as this is
6085 a fake heuristic used to collect the blocks found without any
6086 heuristic. */
6088 static Bool mc_process_cmd_line_options(const HChar* arg)
6090 const HChar* tmp_str;
6091 Bool tmp_show;
6093 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6095 /* Set MC_(clo_mc_level):
6096 1 = A bit tracking only
6097 2 = A and V bit tracking, but no V bit origins
6098 3 = A and V bit tracking, and V bit origins
6100 Do this by inspecting --undef-value-errors= and
6101 --track-origins=. Reject the case --undef-value-errors=no
6102 --track-origins=yes as meaningless.
6104 if VG_BOOL_CLO(arg, "--undef-value-errors", tmp_show) {
6105 if (tmp_show) {
6106 if (MC_(clo_mc_level) == 1)
6107 MC_(clo_mc_level) = 2;
6108 } else {
6109 if (MC_(clo_mc_level) == 3) {
6110 goto bad_level;
6111 } else {
6112 MC_(clo_mc_level) = 1;
6116 else if VG_BOOL_CLO(arg, "--track-origins", tmp_show) {
6117 if (tmp_show) {
6118 if (MC_(clo_mc_level) == 1) {
6119 goto bad_level;
6120 } else {
6121 MC_(clo_mc_level) = 3;
6123 } else {
6124 if (MC_(clo_mc_level) == 3)
6125 MC_(clo_mc_level) = 2;
6128 else if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
6129 else if VG_USET_CLOM(cloPD, arg, "--errors-for-leak-kinds",
6130 MC_(parse_leak_kinds_tokens),
6131 MC_(clo_error_for_leak_kinds)) {}
6132 else if VG_USET_CLOM(cloPD, arg, "--show-leak-kinds",
6133 MC_(parse_leak_kinds_tokens),
6134 MC_(clo_show_leak_kinds)) {}
6135 else if VG_USET_CLOM(cloPD, arg, "--leak-check-heuristics",
6136 MC_(parse_leak_heuristics_tokens),
6137 MC_(clo_leak_check_heuristics)) {}
6138 else if (VG_BOOL_CLOM(cloPD, arg, "--show-reachable", tmp_show)) {
6139 if (tmp_show) {
6140 MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
6141 } else {
6142 MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
6145 else if VG_BOOL_CLOM(cloPD, arg, "--show-possibly-lost", tmp_show) {
6146 if (tmp_show) {
6147 MC_(clo_show_leak_kinds) |= R2S(Possible);
6148 } else {
6149 MC_(clo_show_leak_kinds) &= ~R2S(Possible);
6152 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
6153 MC_(clo_workaround_gcc296_bugs)) {}
6155 else if VG_BINT_CLOM(cloPD, arg, "--freelist-vol", MC_(clo_freelist_vol),
6156 0, 10*1000*1000*1000LL) {}
6158 else if VG_BINT_CLOM(cloPD, arg, "--freelist-big-blocks",
6159 MC_(clo_freelist_big_blocks),
6160 0, 10*1000*1000*1000LL) {}
6162 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=no",
6163 MC_(clo_leak_check), LC_Off) {}
6164 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=summary",
6165 MC_(clo_leak_check), LC_Summary) {}
6166 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=yes",
6167 MC_(clo_leak_check), LC_Full) {}
6168 else if VG_XACT_CLOM(cloPD, arg, "--leak-check=full",
6169 MC_(clo_leak_check), LC_Full) {}
6171 else if VG_XACT_CLO(arg, "--leak-resolution=low",
6172 MC_(clo_leak_resolution), Vg_LowRes) {}
6173 else if VG_XACT_CLO(arg, "--leak-resolution=med",
6174 MC_(clo_leak_resolution), Vg_MedRes) {}
6175 else if VG_XACT_CLO(arg, "--leak-resolution=high",
6176 MC_(clo_leak_resolution), Vg_HighRes) {}
6178 else if VG_STR_CLOM(cloPD, arg, "--ignore-ranges", tmp_str) {
6179 Bool ok = parse_ignore_ranges(tmp_str);
6180 if (!ok) {
6181 VG_(message)(Vg_DebugMsg,
6182 "ERROR: --ignore-ranges: "
6183 "invalid syntax, or end <= start in range\n");
6184 return False;
6186 if (gIgnoredAddressRanges) {
6187 UInt i;
6188 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6189 UWord val = IAR_INVALID;
6190 UWord key_min = ~(UWord)0;
6191 UWord key_max = (UWord)0;
6192 VG_(indexRangeMap)( &key_min, &key_max, &val,
6193 gIgnoredAddressRanges, i );
6194 tl_assert(key_min <= key_max);
6195 UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
6196 if (key_max - key_min > limit && val == IAR_CommandLine) {
6197 VG_(message)(Vg_DebugMsg,
6198 "ERROR: --ignore-ranges: suspiciously large range:\n");
6199 VG_(message)(Vg_DebugMsg,
6200 " 0x%lx-0x%lx (size %lu)\n", key_min, key_max,
6201 key_max - key_min + 1);
6202 return False;
6208 else if VG_STR_CLOM(cloPD, arg, "--ignore-range-below-sp", tmp_str) {
6209 /* This seems at first a bit weird, but: in order to imply
6210 a non-wrapped-around address range, the first offset needs to be
6211 larger than the second one. For example
6212 --ignore-range-below-sp=8192,8189
6213 would cause accesses to in the range [SP-8192, SP-8189] to be
6214 ignored. */
6215 UInt offs1 = 0, offs2 = 0;
6216 Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
6217 // Ensure we used all the text after the '=' sign.
6218 if (ok && *tmp_str != 0) ok = False;
6219 if (!ok) {
6220 VG_(message)(Vg_DebugMsg,
6221 "ERROR: --ignore-range-below-sp: invalid syntax. "
6222 " Expected \"...=decimalnumber-decimalnumber\".\n");
6223 return False;
6225 if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
6226 VG_(message)(Vg_DebugMsg,
6227 "ERROR: --ignore-range-below-sp: suspiciously large "
6228 "offset(s): %u and %u\n", offs1, offs2);
6229 return False;
6231 if (offs1 <= offs2) {
6232 VG_(message)(Vg_DebugMsg,
6233 "ERROR: --ignore-range-below-sp: invalid offsets "
6234 "(the first must be larger): %u and %u\n", offs1, offs2);
6235 return False;
6237 tl_assert(offs1 > offs2);
6238 if (offs1 - offs2 > 4096 /*arbitrary*/) {
6239 VG_(message)(Vg_DebugMsg,
6240 "ERROR: --ignore-range-below-sp: suspiciously large "
6241 "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
6242 return False;
6244 MC_(clo_ignore_range_below_sp) = True;
6245 MC_(clo_ignore_range_below_sp__first_offset) = offs1;
6246 MC_(clo_ignore_range_below_sp__last_offset) = offs2;
6247 return True;
6250 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
6251 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
6253 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
6254 MC_(clo_keep_stacktraces), KS_alloc) {}
6255 else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
6256 MC_(clo_keep_stacktraces), KS_free) {}
6257 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
6258 MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
6259 else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
6260 MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
6261 else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
6262 MC_(clo_keep_stacktraces), KS_none) {}
6264 else if VG_BOOL_CLOM(cloPD, arg, "--show-mismatched-frees",
6265 MC_(clo_show_mismatched_frees)) {}
6266 else if VG_BOOL_CLOM(cloPD, arg, "--show-realloc-size-zero",
6267 MC_(clo_show_realloc_size_zero)) {}
6269 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=no",
6270 MC_(clo_expensive_definedness_checks), EdcNO) {}
6271 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=auto",
6272 MC_(clo_expensive_definedness_checks), EdcAUTO) {}
6273 else if VG_XACT_CLO(arg, "--expensive-definedness-checks=yes",
6274 MC_(clo_expensive_definedness_checks), EdcYES) {}
6276 else if VG_BOOL_CLO(arg, "--xtree-leak",
6277 MC_(clo_xtree_leak)) {}
6278 else if VG_STR_CLO (arg, "--xtree-leak-file",
6279 MC_(clo_xtree_leak_file)) {}
6281 else
6282 return VG_(replacement_malloc_process_cmd_line_option)(arg);
6284 return True;
6287 bad_level:
6288 VG_(fmsg_bad_option)(arg,
6289 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6290 return False;
6293 static void mc_print_usage(void)
6295 VG_(printf)(
6296 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
6297 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
6298 " --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6299 " [definite,possible]\n"
6300 " --errors-for-leak-kinds=kind1,kind2,.. which leak kinds are errors?\n"
6301 " [definite,possible]\n"
6302 " where kind is one of:\n"
6303 " definite indirect possible reachable all none\n"
6304 " --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6305 " improving leak search false positive [all]\n"
6306 " where heur is one of:\n"
6307 " stdstring length64 newarray multipleinheritance all none\n"
6308 " --show-reachable=yes same as --show-leak-kinds=all\n"
6309 " --show-reachable=no --show-possibly-lost=yes\n"
6310 " same as --show-leak-kinds=definite,possible\n"
6311 " --show-reachable=no --show-possibly-lost=no\n"
6312 " same as --show-leak-kinds=definite\n"
6313 " --xtree-leak=no|yes output leak result in xtree format? [no]\n"
6314 " --xtree-leak-file=<file> xtree leak report file [xtleak.kcg.%%p]\n"
6315 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
6316 " --track-origins=no|yes show origins of undefined values? [no]\n"
6317 " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]\n"
6318 " --expensive-definedness-checks=no|auto|yes\n"
6319 " Use extra-precise definedness tracking [auto]\n"
6320 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
6321 " --freelist-big-blocks=<number> releases first blocks with size>= [1000000]\n"
6322 " --workaround-gcc296-bugs=no|yes self explanatory [no]. Deprecated.\n"
6323 " Use --ignore-range-below-sp instead.\n"
6324 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
6325 " --ignore-range-below-sp=<number>-<number> do not report errors for\n"
6326 " accesses at the given offsets below SP\n"
6327 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
6328 " --free-fill=<hexnumber> fill free'd areas with given value\n"
6329 " --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6330 " stack trace(s) to keep for malloc'd/free'd areas [alloc-and-free]\n"
6331 " --show-mismatched-frees=no|yes show frees that don't match the allocator? [yes]\n"
6332 " --show-realloc-size-zero=no|yes show realocs with a size of zero? [yes]\n"
6336 static void mc_print_debug_usage(void)
6338 VG_(printf)(
6339 " (none)\n"
6344 /*------------------------------------------------------------*/
6345 /*--- Client blocks ---*/
6346 /*------------------------------------------------------------*/
6348 /* Client block management:
6350 This is managed as an expanding array of client block descriptors.
6351 Indices of live descriptors are issued to the client, so it can ask
6352 to free them later. Therefore we cannot slide live entries down
6353 over dead ones. Instead we must use free/inuse flags and scan for
6354 an empty slot at allocation time. This in turn means allocation is
6355 relatively expensive, so we hope this does not happen too often.
6357 An unused block has start == size == 0
6360 /* type CGenBlock is defined in mc_include.h */
6362 /* This subsystem is self-initialising. */
6363 static UWord cgb_size = 0;
6364 static UWord cgb_used = 0;
6365 static CGenBlock* cgbs = NULL;
6367 /* Stats for this subsystem. */
6368 static ULong cgb_used_MAX = 0; /* Max in use. */
6369 static ULong cgb_allocs = 0; /* Number of allocs. */
6370 static ULong cgb_discards = 0; /* Number of discards. */
6371 static ULong cgb_search = 0; /* Number of searches. */
6374 /* Get access to the client block array. */
6375 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
6376 /*OUT*/UWord* nBlocks )
6378 *blocks = cgbs;
6379 *nBlocks = cgb_used;
6383 static
6384 Int alloc_client_block ( void )
6386 UWord i, sz_new;
6387 CGenBlock* cgbs_new;
6389 cgb_allocs++;
6391 for (i = 0; i < cgb_used; i++) {
6392 cgb_search++;
6393 if (cgbs[i].start == 0 && cgbs[i].size == 0)
6394 return i;
6397 /* Not found. Try to allocate one at the end. */
6398 if (cgb_used < cgb_size) {
6399 cgb_used++;
6400 return cgb_used-1;
6403 /* Ok, we have to allocate a new one. */
6404 tl_assert(cgb_used == cgb_size);
6405 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
6407 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
6408 for (i = 0; i < cgb_used; i++)
6409 cgbs_new[i] = cgbs[i];
6411 if (cgbs != NULL)
6412 VG_(free)( cgbs );
6413 cgbs = cgbs_new;
6415 cgb_size = sz_new;
6416 cgb_used++;
6417 if (cgb_used > cgb_used_MAX)
6418 cgb_used_MAX = cgb_used;
6419 return cgb_used-1;
6423 static void show_client_block_stats ( void )
6425 VG_(message)(Vg_DebugMsg,
6426 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6427 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6430 static void print_monitor_help ( void )
6432 VG_(gdb_printf)
6434 "\n"
6435 "memcheck monitor commands:\n"
6436 " xb <addr> [<len>]\n"
6437 " prints validity bits for <len> (or 1) bytes at <addr>\n"
6438 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6439 " Then prints the bytes values below the corresponding validity bits\n"
6440 " in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6441 " Example: xb 0x8049c78 10\n"
6442 " get_vbits <addr> [<len>]\n"
6443 " Similar to xb, but only prints the validity bytes by group of 4.\n"
6444 " make_memory [noaccess|undefined\n"
6445 " |defined|Definedifaddressable] <addr> [<len>]\n"
6446 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6447 " check_memory [addressable|defined] <addr> [<len>]\n"
6448 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6449 " and outputs a description of <addr>\n"
6450 " leak_check [full*|summary|xtleak]\n"
6451 " [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6452 " [heuristics heur1,heur2,...]\n"
6453 " [new|increased*|changed|any]\n"
6454 " [unlimited*|limited <max_loss_records_output>]\n"
6455 " * = defaults\n"
6456 " xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6457 " where kind is one of:\n"
6458 " definite indirect possible reachable all none\n"
6459 " where heur is one of:\n"
6460 " stdstring length64 newarray multipleinheritance all none*\n"
6461 " Examples: leak_check\n"
6462 " leak_check summary any\n"
6463 " leak_check full kinds indirect,possible\n"
6464 " leak_check full reachable any limited 100\n"
6465 " block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6466 " [unlimited*|limited <max_blocks>]\n"
6467 " [heuristics heur1,heur2,...]\n"
6468 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
6469 " (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6470 " With heuristics, only shows the blocks found via heur1,heur2,...\n"
6471 " * = defaults\n"
6472 " who_points_at <addr> [<len>]\n"
6473 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
6474 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6475 " with len > 1, will also show \"interior pointers\")\n"
6476 " xtmemory [<filename>]\n"
6477 " dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6478 "\n");
6481 /* Print szB bytes at address, with a format similar to the gdb command
6482 x /<szB>xb address.
6483 res[i] == 1 indicates the corresponding byte is addressable. */
6484 static void gdb_xb (Addr address, SizeT szB, Int res[])
6486 UInt i;
6488 for (i = 0; i < szB; i++) {
6489 UInt bnr = i % 8;
6490 if (bnr == 0) {
6491 if (i != 0)
6492 VG_(printf) ("\n"); // Terminate previous line
6493 VG_(printf) ("%p:", (void*)(address+i));
6495 if (res[i] == 1)
6496 VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6497 else
6498 VG_(printf) ("\t0x??");
6500 VG_(printf) ("\n"); // Terminate previous line
6504 /* Returns the address of the next non space character,
6505 or address of the string terminator. */
6506 static HChar* next_non_space (HChar *s)
6508 while (*s && *s == ' ')
6509 s++;
6510 return s;
6513 /* Parse an integer slice, i.e. a single integer or a range of integer.
6514 Syntax is:
6515 <integer>[..<integer> ]
6516 (spaces are allowed before and/or after ..).
6517 Return True if range correctly parsed, False otherwise. */
6518 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6519 UInt *from, UInt *to)
6521 HChar* wl;
6522 HChar *endptr;
6523 endptr = NULL;////
6524 wl = VG_(strtok_r) (s, " ", saveptr);
6526 /* slice must start with an integer. */
6527 if (wl == NULL) {
6528 VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6529 return False;
6531 *from = VG_(strtoull10) (wl, &endptr);
6532 if (endptr == wl) {
6533 VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6534 return False;
6537 if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6538 /* wl token is an integer terminating the string
6539 or else next token does not start with .
6540 In both cases, the slice is a single integer. */
6541 *to = *from;
6542 return True;
6545 if (*endptr == '\0') {
6546 // iii .. => get the next token
6547 wl = VG_(strtok_r) (NULL, " .", saveptr);
6548 } else {
6549 // It must be iii..
6550 if (*endptr != '.' && *(endptr+1) != '.') {
6551 VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6552 return False;
6554 if ( *(endptr+2) == ' ') {
6555 // It must be iii.. jjj => get the next token
6556 wl = VG_(strtok_r) (NULL, " .", saveptr);
6557 } else {
6558 // It must be iii..jjj
6559 wl = endptr+2;
6563 *to = VG_(strtoull10) (wl, &endptr);
6564 if (*endptr != '\0') {
6565 VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6566 return False;
6569 if (*from > *to) {
6570 VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6571 "in slice <from>..<to>\n");
6572 return False;
6575 return True;
6578 /* return True if request recognised, False otherwise */
6579 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6581 HChar* wcmd;
6582 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
6583 HChar *ssaveptr;
6585 VG_(strcpy) (s, req);
6587 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6588 /* NB: if possible, avoid introducing a new command below which
6589 starts with the same first letter(s) as an already existing
6590 command. This ensures a shorter abbreviation for the user. */
6591 switch (VG_(keyword_id)
6592 ("help get_vbits leak_check make_memory check_memory "
6593 "block_list who_points_at xb xtmemory",
6594 wcmd, kwd_report_duplicated_matches)) {
6595 case -2: /* multiple matches */
6596 return True;
6597 case -1: /* not found */
6598 return False;
6599 case 0: /* help */
6600 print_monitor_help();
6601 return True;
6602 case 1: { /* get_vbits */
6603 Addr address;
6604 SizeT szB = 1;
6605 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6606 UChar vbits;
6607 Int i;
6608 Int unaddressable = 0;
6609 for (i = 0; i < szB; i++) {
6610 Int res = mc_get_or_set_vbits_for_client
6611 (address+i, (Addr) &vbits, 1,
6612 False, /* get them */
6613 False /* is client request */ );
6614 /* we are before the first character on next line, print a \n. */
6615 if ((i % 32) == 0 && i != 0)
6616 VG_(printf) ("\n");
6617 /* we are before the next block of 4 starts, print a space. */
6618 else if ((i % 4) == 0 && i != 0)
6619 VG_(printf) (" ");
6620 if (res == 1) {
6621 VG_(printf) ("%02x", vbits);
6622 } else {
6623 tl_assert(3 == res);
6624 unaddressable++;
6625 VG_(printf) ("__");
6628 VG_(printf) ("\n");
6629 if (unaddressable) {
6630 VG_(printf)
6631 ("Address %p len %lu has %d bytes unaddressable\n",
6632 (void *)address, szB, unaddressable);
6635 return True;
6637 case 2: { /* leak_check */
6638 Int err = 0;
6639 LeakCheckParams lcp;
6640 HChar* xt_filename = NULL;
6641 HChar* kw;
6643 lcp.mode = LC_Full;
6644 lcp.show_leak_kinds = R2S(Possible) | R2S(Unreached);
6645 lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6646 lcp.heuristics = 0;
6647 lcp.deltamode = LCD_Increased;
6648 lcp.max_loss_records_output = 999999999;
6649 lcp.requested_by_monitor_command = True;
6650 lcp.xt_filename = NULL;
6652 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6653 kw != NULL;
6654 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6655 switch (VG_(keyword_id)
6656 ("full summary xtleak "
6657 "kinds reachable possibleleak definiteleak "
6658 "heuristics "
6659 "new increased changed any "
6660 "unlimited limited ",
6661 kw, kwd_report_all)) {
6662 case -2: err++; break;
6663 case -1: err++; break;
6664 case 0: /* full */
6665 lcp.mode = LC_Full; break;
6666 case 1: /* summary */
6667 lcp.mode = LC_Summary; break;
6668 case 2: /* xtleak */
6669 lcp.mode = LC_Full;
6670 xt_filename
6671 = VG_(expand_file_name)("--xtleak-mc_main.c",
6672 "xtleak.kcg.%p.%n");
6673 lcp.xt_filename = xt_filename;
6674 break;
6675 case 3: { /* kinds */
6676 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6677 if (wcmd == NULL
6678 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6679 True/*allow_all*/,
6680 wcmd,
6681 &lcp.show_leak_kinds)) {
6682 VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6683 err++;
6685 break;
6687 case 4: /* reachable */
6688 lcp.show_leak_kinds = MC_(all_Reachedness)();
6689 break;
6690 case 5: /* possibleleak */
6691 lcp.show_leak_kinds
6692 = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6693 break;
6694 case 6: /* definiteleak */
6695 lcp.show_leak_kinds = R2S(Unreached);
6696 break;
6697 case 7: { /* heuristics */
6698 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6699 if (wcmd == NULL
6700 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6701 True,/*allow_all*/
6702 wcmd,
6703 &lcp.heuristics)) {
6704 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6705 err++;
6707 break;
6709 case 8: /* new */
6710 lcp.deltamode = LCD_New; break;
6711 case 9: /* increased */
6712 lcp.deltamode = LCD_Increased; break;
6713 case 10: /* changed */
6714 lcp.deltamode = LCD_Changed; break;
6715 case 11: /* any */
6716 lcp.deltamode = LCD_Any; break;
6717 case 12: /* unlimited */
6718 lcp.max_loss_records_output = 999999999; break;
6719 case 13: { /* limited */
6720 Int int_value;
6721 const HChar* endptr;
6723 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6724 if (wcmd == NULL) {
6725 int_value = 0;
6726 endptr = "empty"; /* to report an error below */
6727 } else {
6728 HChar *the_end;
6729 int_value = VG_(strtoll10) (wcmd, &the_end);
6730 endptr = the_end;
6732 if (*endptr != '\0')
6733 VG_(gdb_printf) ("missing or malformed integer value\n");
6734 else if (int_value > 0)
6735 lcp.max_loss_records_output = (UInt) int_value;
6736 else
6737 VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6738 " got %d\n", int_value);
6739 break;
6741 default:
6742 tl_assert (0);
6745 if (!err)
6746 MC_(detect_memory_leaks)(tid, &lcp);
6747 if (xt_filename != NULL)
6748 VG_(free)(xt_filename);
6749 return True;
6752 case 3: { /* make_memory */
6753 Addr address;
6754 SizeT szB = 1;
6755 Int kwdid = VG_(keyword_id)
6756 ("noaccess undefined defined Definedifaddressable",
6757 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6758 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6759 return True;
6760 switch (kwdid) {
6761 case -2: break;
6762 case -1: break;
6763 case 0: MC_(make_mem_noaccess) (address, szB); break;
6764 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6765 MC_OKIND_USER ); break;
6766 case 2: MC_(make_mem_defined) ( address, szB ); break;
6767 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
6768 default: tl_assert(0);
6770 return True;
6773 case 4: { /* check_memory */
6774 Addr address;
6775 SizeT szB = 1;
6776 Addr bad_addr;
6777 UInt okind;
6778 const HChar* src;
6779 UInt otag;
6780 UInt ecu;
6781 ExeContext* origin_ec;
6782 MC_ReadResult res;
6784 Int kwdid = VG_(keyword_id)
6785 ("addressable defined",
6786 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6787 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6788 return True;
6789 switch (kwdid) {
6790 case -2: break;
6791 case -1: break;
6792 case 0: /* addressable */
6793 if (is_mem_addressable ( address, szB, &bad_addr ))
6794 VG_(printf) ("Address %p len %lu addressable\n",
6795 (void *)address, szB);
6796 else
6797 VG_(printf)
6798 ("Address %p len %lu not addressable:\nbad address %p\n",
6799 (void *)address, szB, (void *) bad_addr);
6800 // Describe this (probably live) address with current epoch
6801 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6802 break;
6803 case 1: /* defined */
6804 res = is_mem_defined ( address, szB, &bad_addr, &otag );
6805 if (MC_AddrErr == res)
6806 VG_(printf)
6807 ("Address %p len %lu not addressable:\nbad address %p\n",
6808 (void *)address, szB, (void *) bad_addr);
6809 else if (MC_ValueErr == res) {
6810 okind = otag & 3;
6811 switch (okind) {
6812 case MC_OKIND_STACK:
6813 src = " was created by a stack allocation"; break;
6814 case MC_OKIND_HEAP:
6815 src = " was created by a heap allocation"; break;
6816 case MC_OKIND_USER:
6817 src = " was created by a client request"; break;
6818 case MC_OKIND_UNKNOWN:
6819 src = ""; break;
6820 default: tl_assert(0);
6822 VG_(printf)
6823 ("Address %p len %lu not defined:\n"
6824 "Uninitialised value at %p%s\n",
6825 (void *)address, szB, (void *) bad_addr, src);
6826 ecu = otag & ~3;
6827 if (VG_(is_plausible_ECU)(ecu)) {
6828 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6829 VG_(pp_ExeContext)( origin_ec );
6832 else
6833 VG_(printf) ("Address %p len %lu defined\n",
6834 (void *)address, szB);
6835 // Describe this (probably live) address with current epoch
6836 MC_(pp_describe_addr) (VG_(current_DiEpoch)(), address);
6837 break;
6838 default: tl_assert(0);
6840 return True;
6843 case 5: { /* block_list */
6844 HChar* wl;
6845 HChar *the_end;
6846 UInt lr_nr_from = 0;
6847 UInt lr_nr_to = 0;
6849 if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6850 UInt limit_blocks = 999999999;
6851 Int int_value;
6852 UInt heuristics = 0;
6854 for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6855 wl != NULL;
6856 wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6857 switch (VG_(keyword_id) ("unlimited limited heuristics ",
6858 wl, kwd_report_all)) {
6859 case -2: return True;
6860 case -1: return True;
6861 case 0: /* unlimited */
6862 limit_blocks = 999999999; break;
6863 case 1: /* limited */
6864 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6865 if (wcmd == NULL) {
6866 VG_(gdb_printf) ("missing integer value\n");
6867 return True;
6869 int_value = VG_(strtoll10) (wcmd, &the_end);
6870 if (*the_end != '\0') {
6871 VG_(gdb_printf) ("malformed integer value\n");
6872 return True;
6874 if (int_value <= 0) {
6875 VG_(gdb_printf) ("max_blocks must be >= 1,"
6876 " got %d\n", int_value);
6877 return True;
6879 limit_blocks = (UInt) int_value;
6880 break;
6881 case 2: /* heuristics */
6882 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6883 if (wcmd == NULL
6884 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6885 True,/*allow_all*/
6886 wcmd,
6887 &heuristics)) {
6888 VG_(gdb_printf) ("missing or malformed heuristics set\n");
6889 return True;
6891 break;
6892 default:
6893 tl_assert (0);
6896 /* substract 1 from lr_nr_from/lr_nr_to as what is shown to the user
6897 is 1 more than the index in lr_array. */
6898 if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6899 lr_nr_to-1,
6900 limit_blocks,
6901 heuristics))
6902 VG_(gdb_printf) ("invalid loss record nr\n");
6904 return True;
6907 case 6: { /* who_points_at */
6908 Addr address;
6909 SizeT szB = 1;
6911 if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6912 return True;
6913 if (address == (Addr) 0) {
6914 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6915 return True;
6917 MC_(who_points_at) (address, szB);
6918 return True;
6921 case 7: { /* xb */
6922 Addr address;
6923 SizeT szB = 1;
6924 if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6925 UChar vbits[8];
6926 Int res[8];
6927 Int i;
6928 Int unaddressable = 0;
6929 for (i = 0; i < szB; i++) {
6930 Int bnr = i % 8;
6931 /* We going to print the first vabits of a new line.
6932 Terminate the previous line if needed: prints a line with the
6933 address and the data. */
6934 if (bnr == 0) {
6935 if (i != 0) {
6936 VG_(printf) ("\n");
6937 gdb_xb (address + i - 8, 8, res);
6939 VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6941 res[bnr] = mc_get_or_set_vbits_for_client
6942 (address+i, (Addr) &vbits[bnr], 1,
6943 False, /* get them */
6944 False /* is client request */ );
6945 if (res[bnr] == 1) {
6946 VG_(printf) ("\t %02x", vbits[bnr]);
6947 } else {
6948 tl_assert(3 == res[bnr]);
6949 unaddressable++;
6950 VG_(printf) ("\t __");
6953 VG_(printf) ("\n");
6954 if (szB % 8 == 0 && szB > 0)
6955 gdb_xb (address + szB - 8, 8, res);
6956 else
6957 gdb_xb (address + szB - szB % 8, szB % 8, res);
6958 if (unaddressable) {
6959 VG_(printf)
6960 ("Address %p len %lu has %d bytes unaddressable\n",
6961 (void *)address, szB, unaddressable);
6964 return True;
6967 case 8: { /* xtmemory */
6968 HChar* filename;
6969 filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
6970 MC_(xtmemory_report)(filename, False);
6971 return True;
6974 default:
6975 tl_assert(0);
6976 return False;
6980 /*------------------------------------------------------------*/
6981 /*--- Client requests ---*/
6982 /*------------------------------------------------------------*/
6984 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6986 Int i;
6987 Addr bad_addr;
6988 MC_Chunk* mc = NULL;
6990 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6991 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6992 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6993 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
6994 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
6995 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
6996 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
6997 && VG_USERREQ__MEMPOOL_FREE != arg[0]
6998 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
6999 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
7000 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
7001 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
7002 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0]
7003 && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
7004 && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
7005 return False;
7007 switch (arg[0]) {
7008 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
7009 Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
7010 if (!ok)
7011 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
7012 *ret = ok ? (UWord)NULL : bad_addr;
7013 break;
7016 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
7017 Bool errorV = False;
7018 Addr bad_addrV = 0;
7019 UInt otagV = 0;
7020 Bool errorA = False;
7021 Addr bad_addrA = 0;
7022 is_mem_defined_comprehensive(
7023 arg[1], arg[2],
7024 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
7026 if (errorV) {
7027 MC_(record_user_error) ( tid, bad_addrV,
7028 /*isAddrErr*/False, otagV );
7030 if (errorA) {
7031 MC_(record_user_error) ( tid, bad_addrA,
7032 /*isAddrErr*/True, 0 );
7034 /* Return the lower of the two erring addresses, if any. */
7035 *ret = 0;
7036 if (errorV && !errorA) {
7037 *ret = bad_addrV;
7039 if (!errorV && errorA) {
7040 *ret = bad_addrA;
7042 if (errorV && errorA) {
7043 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
7045 break;
7048 case VG_USERREQ__DO_LEAK_CHECK: {
7049 LeakCheckParams lcp;
7051 if (arg[1] == 0)
7052 lcp.mode = LC_Full;
7053 else if (arg[1] == 1)
7054 lcp.mode = LC_Summary;
7055 else {
7056 VG_(message)(Vg_UserMsg,
7057 "Warning: unknown memcheck leak search mode\n");
7058 lcp.mode = LC_Full;
7061 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7062 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7063 lcp.heuristics = MC_(clo_leak_check_heuristics);
7065 if (arg[2] == 0)
7066 lcp.deltamode = LCD_Any;
7067 else if (arg[2] == 1)
7068 lcp.deltamode = LCD_Increased;
7069 else if (arg[2] == 2)
7070 lcp.deltamode = LCD_Changed;
7071 else if (arg[2] == 3)
7072 lcp.deltamode = LCD_New;
7073 else {
7074 VG_(message)
7075 (Vg_UserMsg,
7076 "Warning: unknown memcheck leak search deltamode\n");
7077 lcp.deltamode = LCD_Any;
7079 lcp.max_loss_records_output = 999999999;
7080 lcp.requested_by_monitor_command = False;
7081 lcp.xt_filename = NULL;
7083 MC_(detect_memory_leaks)(tid, &lcp);
7084 *ret = 0; /* return value is meaningless */
7085 break;
7088 case VG_USERREQ__MAKE_MEM_NOACCESS:
7089 MC_(make_mem_noaccess) ( arg[1], arg[2] );
7090 *ret = -1;
7091 break;
7093 case VG_USERREQ__MAKE_MEM_UNDEFINED:
7094 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
7095 MC_OKIND_USER );
7096 *ret = -1;
7097 break;
7099 case VG_USERREQ__MAKE_MEM_DEFINED:
7100 MC_(make_mem_defined) ( arg[1], arg[2] );
7101 *ret = -1;
7102 break;
7104 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
7105 make_mem_defined_if_addressable ( arg[1], arg[2] );
7106 *ret = -1;
7107 break;
7109 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
7110 if (arg[1] != 0 && arg[2] != 0) {
7111 i = alloc_client_block();
7112 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7113 cgbs[i].start = arg[1];
7114 cgbs[i].size = arg[2];
7115 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
7116 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
7117 *ret = i;
7118 } else
7119 *ret = -1;
7120 break;
7122 case VG_USERREQ__DISCARD: /* discard */
7123 if (cgbs == NULL
7124 || arg[2] >= cgb_used ||
7125 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
7126 *ret = 1;
7127 } else {
7128 tl_assert(arg[2] < cgb_used);
7129 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
7130 VG_(free)(cgbs[arg[2]].desc);
7131 cgb_discards++;
7132 *ret = 0;
7134 break;
7136 case VG_USERREQ__GET_VBITS:
7137 *ret = mc_get_or_set_vbits_for_client
7138 ( arg[1], arg[2], arg[3],
7139 False /* get them */,
7140 True /* is client request */ );
7141 break;
7143 case VG_USERREQ__SET_VBITS:
7144 *ret = mc_get_or_set_vbits_for_client
7145 ( arg[1], arg[2], arg[3],
7146 True /* set them */,
7147 True /* is client request */ );
7148 break;
7150 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
7151 UWord** argp = (UWord**)arg;
7152 // MC_(bytes_leaked) et al were set by the last leak check (or zero
7153 // if no prior leak checks performed).
7154 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
7155 *argp[2] = MC_(bytes_dubious);
7156 *argp[3] = MC_(bytes_reachable);
7157 *argp[4] = MC_(bytes_suppressed);
7158 // there is no argp[5]
7159 //*argp[5] = MC_(bytes_indirect);
7160 // XXX need to make *argp[1-4] defined; currently done in the
7161 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7162 *ret = 0;
7163 return True;
7165 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
7166 UWord** argp = (UWord**)arg;
7167 // MC_(blocks_leaked) et al were set by the last leak check (or zero
7168 // if no prior leak checks performed).
7169 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
7170 *argp[2] = MC_(blocks_dubious);
7171 *argp[3] = MC_(blocks_reachable);
7172 *argp[4] = MC_(blocks_suppressed);
7173 // there is no argp[5]
7174 //*argp[5] = MC_(blocks_indirect);
7175 // XXX need to make *argp[1-4] defined; currently done in the
7176 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7177 *ret = 0;
7178 return True;
7180 case VG_USERREQ__MALLOCLIKE_BLOCK: {
7181 Addr p = (Addr)arg[1];
7182 SizeT sizeB = arg[2];
7183 UInt rzB = arg[3];
7184 Bool is_zeroed = (Bool)arg[4];
7186 MC_(new_block) ( tid, p, sizeB, /*ignored*/0U, 0U, is_zeroed,
7187 MC_AllocCustom, MC_(malloc_list) );
7188 if (rzB > 0) {
7189 MC_(make_mem_noaccess) ( p - rzB, rzB);
7190 MC_(make_mem_noaccess) ( p + sizeB, rzB);
7192 return True;
7194 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
7195 Addr p = (Addr)arg[1];
7196 SizeT oldSizeB = arg[2];
7197 SizeT newSizeB = arg[3];
7198 UInt rzB = arg[4];
7200 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
7201 return True;
7203 case VG_USERREQ__FREELIKE_BLOCK: {
7204 Addr p = (Addr)arg[1];
7205 UInt rzB = arg[2];
7207 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
7208 return True;
7211 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
7212 HChar* s = (HChar*)arg[1];
7213 Addr dst = (Addr) arg[2];
7214 Addr src = (Addr) arg[3];
7215 SizeT len = (SizeT)arg[4];
7216 MC_(record_overlap_error)(tid, s, src, dst, len);
7217 return True;
7220 case _VG_USERREQ__MEMCHECK_VERIFY_ALIGNMENT: {
7221 struct AlignedAllocInfo *aligned_alloc_info = (struct AlignedAllocInfo *)arg[1];
7222 tl_assert(aligned_alloc_info);
7224 switch (aligned_alloc_info->alloc_kind) {
7225 case AllocKindMemalign:
7226 // other platforms just ensure it is a power of 2
7227 // ignore Illumos only enforcing multiple of 4 (probably a bug)
7228 if (aligned_alloc_info->orig_alignment == 0U ||
7229 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7230 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be power of 2)" );
7232 // size zero not allowed on all platforms (e.g. Illumos)
7233 if (aligned_alloc_info->size == 0) {
7234 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "memalign()" );
7236 break;
7237 case AllocKindPosixMemalign:
7238 // must be power of 2
7239 // alignment at least sizeof(size_t)
7240 // size of 0 implementation defined
7241 if (aligned_alloc_info->orig_alignment < sizeof(SizeT) ||
7242 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7243 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero, a power of 2 and a multiple of sizeof(void*))" );
7245 if (aligned_alloc_info->size == 0) {
7246 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "posix_memalign()" );
7248 break;
7249 case AllocKindAlignedAlloc:
7250 // must be power of 2
7251 if ((aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7252 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be a power of 2)" );
7254 // size should be integral multiple of alignment
7255 if (aligned_alloc_info->orig_alignment &&
7256 aligned_alloc_info->size % aligned_alloc_info->orig_alignment != 0U) {
7257 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , aligned_alloc_info->size, " (size should be a multiple of alignment)" );
7259 if (aligned_alloc_info->size == 0) {
7260 MC_(record_bad_size) ( tid, aligned_alloc_info->size, "aligned_alloc()" );
7262 break;
7263 case AllocKindDeleteSized:
7264 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7265 if (mc && mc->szB != aligned_alloc_info->size) {
7266 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new/delete" );
7268 break;
7269 case AllocKindVecDeleteSized:
7270 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7271 if (mc && mc->szB != aligned_alloc_info->size) {
7272 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new[][/delete[]" );
7274 break;
7275 case AllocKindNewAligned:
7276 if (aligned_alloc_info->orig_alignment == 0 ||
7277 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7278 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7280 break;
7281 case AllocKindVecNewAligned:
7282 if (aligned_alloc_info->orig_alignment == 0 ||
7283 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7284 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7286 break;
7287 case AllocKindDeleteDefault:
7288 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7289 if (mc && mc->alignB) {
7290 MC_(record_align_mismatch_error) ( tid, mc, 0U, True, "new/delete");
7292 break;
7293 case AllocKindDeleteAligned:
7294 if (aligned_alloc_info->orig_alignment == 0 ||
7295 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7296 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7298 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7299 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7300 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new/delete");
7302 break;
7303 case AllocKindVecDeleteDefault:
7304 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7305 if (mc && mc->alignB) {
7306 MC_(record_align_mismatch_error) ( tid, mc, 0U, True, "new[]/delete[]");
7308 break;
7309 case AllocKindVecDeleteAligned:
7310 if (aligned_alloc_info->orig_alignment == 0 ||
7311 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7312 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7314 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7315 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7316 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new[]/delete[]");
7318 break;
7319 case AllocKindDeleteSizedAligned:
7320 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7321 if (mc && mc->szB != aligned_alloc_info->size) {
7322 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new/delete");
7324 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7325 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new/delete");
7327 if (aligned_alloc_info->orig_alignment == 0 ||
7328 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7329 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7331 break;
7332 case AllocKindVecDeleteSizedAligned:
7333 mc = VG_(HT_lookup) ( MC_(malloc_list), (UWord)aligned_alloc_info->mem );
7334 if (mc && mc->szB != aligned_alloc_info->size) {
7335 MC_(record_size_mismatch_error) ( tid, mc, aligned_alloc_info->size, "new[]/delete[]" );
7337 if (mc && aligned_alloc_info->orig_alignment != mc->alignB) {
7338 MC_(record_align_mismatch_error) ( tid, mc, aligned_alloc_info->orig_alignment, False, "new[]/delete[]");
7340 if (aligned_alloc_info->orig_alignment == 0 ||
7341 (aligned_alloc_info->orig_alignment & (aligned_alloc_info->orig_alignment - 1)) != 0) {
7342 MC_(record_bad_alignment) ( tid, aligned_alloc_info->orig_alignment , 0U, " (should be non-zero and a power of 2)" );
7344 break;
7345 default:
7346 tl_assert (False);
7349 return True;
7352 case VG_USERREQ__CREATE_MEMPOOL: {
7353 Addr pool = (Addr)arg[1];
7354 UInt rzB = arg[2];
7355 Bool is_zeroed = (Bool)arg[3];
7356 UInt flags = arg[4];
7358 // The create_mempool function does not know these mempool flags,
7359 // pass as booleans.
7360 MC_(create_mempool) ( pool, rzB, is_zeroed,
7361 (flags & VALGRIND_MEMPOOL_AUTO_FREE),
7362 (flags & VALGRIND_MEMPOOL_METAPOOL) );
7363 return True;
7366 case VG_USERREQ__DESTROY_MEMPOOL: {
7367 Addr pool = (Addr)arg[1];
7369 MC_(destroy_mempool) ( pool );
7370 return True;
7373 case VG_USERREQ__MEMPOOL_ALLOC: {
7374 Addr pool = (Addr)arg[1];
7375 Addr addr = (Addr)arg[2];
7376 UInt size = arg[3];
7378 MC_(mempool_alloc) ( tid, pool, addr, size );
7379 return True;
7382 case VG_USERREQ__MEMPOOL_FREE: {
7383 Addr pool = (Addr)arg[1];
7384 Addr addr = (Addr)arg[2];
7386 MC_(mempool_free) ( pool, addr );
7387 return True;
7390 case VG_USERREQ__MEMPOOL_TRIM: {
7391 Addr pool = (Addr)arg[1];
7392 Addr addr = (Addr)arg[2];
7393 UInt size = arg[3];
7395 MC_(mempool_trim) ( pool, addr, size );
7396 return True;
7399 case VG_USERREQ__MOVE_MEMPOOL: {
7400 Addr poolA = (Addr)arg[1];
7401 Addr poolB = (Addr)arg[2];
7403 MC_(move_mempool) ( poolA, poolB );
7404 return True;
7407 case VG_USERREQ__MEMPOOL_CHANGE: {
7408 Addr pool = (Addr)arg[1];
7409 Addr addrA = (Addr)arg[2];
7410 Addr addrB = (Addr)arg[3];
7411 UInt size = arg[4];
7413 MC_(mempool_change) ( pool, addrA, addrB, size );
7414 return True;
7417 case VG_USERREQ__MEMPOOL_EXISTS: {
7418 Addr pool = (Addr)arg[1];
7420 *ret = (UWord) MC_(mempool_exists) ( pool );
7421 return True;
7424 case VG_USERREQ__GDB_MONITOR_COMMAND: {
7425 Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
7426 if (handled)
7427 *ret = 1;
7428 else
7429 *ret = 0;
7430 return handled;
7433 case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
7434 case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
7435 Bool addRange
7436 = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
7437 Bool ok
7438 = modify_ignore_ranges(addRange, arg[1], arg[2]);
7439 *ret = ok ? 1 : 0;
7440 return True;
7443 default:
7444 VG_(message)(Vg_UserMsg,
7445 "Warning: unknown memcheck client request code %llx\n",
7446 (ULong)arg[0]);
7447 return False;
7449 return True;
7453 /*------------------------------------------------------------*/
7454 /*--- Crude profiling machinery. ---*/
7455 /*------------------------------------------------------------*/
7457 // We track a number of interesting events (using PROF_EVENT)
7458 // if MC_PROFILE_MEMORY is defined.
7460 #ifdef MC_PROFILE_MEMORY
7462 ULong MC_(event_ctr)[MCPE_LAST];
7464 /* Event counter names. Use the name of the function that increases the
7465 event counter. Drop any MC_() and mc_ prefices. */
7466 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
7467 [MCPE_LOADVN_SLOW] = "LOADVn_slow",
7468 [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
7469 [MCPE_STOREVN_SLOW] = "STOREVn_slow",
7470 [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
7471 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
7472 [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
7473 "make_aligned_word32_undefined_slow",
7474 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
7475 [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
7476 "make_aligned_word64_undefined_slow",
7477 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
7478 [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
7479 "make_aligned_word32_noaccess_slow",
7480 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
7481 [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
7482 "make_aligned_word64_noaccess_slow",
7483 [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
7484 [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
7485 [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
7486 [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
7487 [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
7488 [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
7489 [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
7490 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
7491 [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
7492 [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
7493 [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
7494 [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
7495 [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
7496 [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
7497 [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
7498 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
7499 [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
7500 "is_mem_defined_comprehensive(loop)",
7501 [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
7502 [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
7503 [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
7504 [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
7505 [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
7506 [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
7507 "set_address_range_perms(single-secmap)",
7508 [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
7509 "set_address_range_perms(startof-secmap)",
7510 [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
7511 "set_address_range_perms(multiple-secmaps)",
7512 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
7513 "set_address_range_perms(dist-sm1)",
7514 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
7515 "set_address_range_perms(dist-sm2)",
7516 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
7517 "set_address_range_perms(dist-sm1-quick)",
7518 [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
7519 "set_address_range_perms(dist-sm2-quick)",
7520 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
7521 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
7522 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
7523 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
7524 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
7525 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
7526 [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
7527 "set_address_range_perms(loop64K-free-dist-sm)",
7528 [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
7529 [MCPE_LOADV_128_OR_256] = "LOADV_128_or_256",
7530 [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
7531 [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
7532 [MCPE_LOADV64] = "LOADV64",
7533 [MCPE_LOADV64_SLOW1] = "LOADV64-slow1",
7534 [MCPE_LOADV64_SLOW2] = "LOADV64-slow2",
7535 [MCPE_STOREV64] = "STOREV64",
7536 [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
7537 [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
7538 [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
7539 [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
7540 [MCPE_LOADV32] = "LOADV32",
7541 [MCPE_LOADV32_SLOW1] = "LOADV32-slow1",
7542 [MCPE_LOADV32_SLOW2] = "LOADV32-slow2",
7543 [MCPE_STOREV32] = "STOREV32",
7544 [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
7545 [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
7546 [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
7547 [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
7548 [MCPE_LOADV16] = "LOADV16",
7549 [MCPE_LOADV16_SLOW1] = "LOADV16-slow1",
7550 [MCPE_LOADV16_SLOW2] = "LOADV16-slow2",
7551 [MCPE_STOREV16] = "STOREV16",
7552 [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
7553 [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
7554 [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
7555 [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
7556 [MCPE_LOADV8] = "LOADV8",
7557 [MCPE_LOADV8_SLOW1] = "LOADV8-slow1",
7558 [MCPE_LOADV8_SLOW2] = "LOADV8-slow2",
7559 [MCPE_STOREV8] = "STOREV8",
7560 [MCPE_STOREV8_SLOW1] = "STOREV8-slow1",
7561 [MCPE_STOREV8_SLOW2] = "STOREV8-slow2",
7562 [MCPE_STOREV8_SLOW3] = "STOREV8-slow3",
7563 [MCPE_STOREV8_SLOW4] = "STOREV8-slow4",
7564 [MCPE_NEW_MEM_STACK_4] = "new_mem_stack_4",
7565 [MCPE_NEW_MEM_STACK_8] = "new_mem_stack_8",
7566 [MCPE_NEW_MEM_STACK_12] = "new_mem_stack_12",
7567 [MCPE_NEW_MEM_STACK_16] = "new_mem_stack_16",
7568 [MCPE_NEW_MEM_STACK_32] = "new_mem_stack_32",
7569 [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
7570 [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
7571 [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
7572 [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
7573 [MCPE_DIE_MEM_STACK_4] = "die_mem_stack_4",
7574 [MCPE_DIE_MEM_STACK_8] = "die_mem_stack_8",
7575 [MCPE_DIE_MEM_STACK_12] = "die_mem_stack_12",
7576 [MCPE_DIE_MEM_STACK_16] = "die_mem_stack_16",
7577 [MCPE_DIE_MEM_STACK_32] = "die_mem_stack_32",
7578 [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
7579 [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
7580 [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
7581 [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
7582 [MCPE_NEW_MEM_STACK] = "new_mem_stack",
7583 [MCPE_DIE_MEM_STACK] = "die_mem_stack",
7584 [MCPE_MAKE_STACK_UNINIT_W_O] = "MAKE_STACK_UNINIT_w_o",
7585 [MCPE_MAKE_STACK_UNINIT_NO_O] = "MAKE_STACK_UNINIT_no_o",
7586 [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
7587 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
7588 = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7589 [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
7590 = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7591 [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
7592 = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7595 static void init_prof_mem ( void )
7597 Int i, name_count = 0;
7599 for (i = 0; i < MCPE_LAST; i++) {
7600 MC_(event_ctr)[i] = 0;
7601 if (MC_(event_ctr_name)[i] != NULL)
7602 ++name_count;
7605 /* Make sure every profiling event has a name */
7606 tl_assert(name_count == MCPE_LAST);
7609 static void done_prof_mem ( void )
7611 Int i, n;
7612 Bool spaced = False;
7613 for (i = n = 0; i < MCPE_LAST; i++) {
7614 if (!spaced && (n % 10) == 0) {
7615 VG_(printf)("\n");
7616 spaced = True;
7618 if (MC_(event_ctr)[i] > 0) {
7619 spaced = False;
7620 ++n;
7621 VG_(printf)( "prof mem event %3d: %11llu %s\n",
7622 i, MC_(event_ctr)[i],
7623 MC_(event_ctr_name)[i]);
7628 #else
7630 static void init_prof_mem ( void ) { }
7631 static void done_prof_mem ( void ) { }
7633 #endif
7636 /*------------------------------------------------------------*/
7637 /*--- Origin tracking stuff ---*/
7638 /*------------------------------------------------------------*/
7640 /*--------------------------------------------*/
7641 /*--- Origin tracking: load handlers ---*/
7642 /*--------------------------------------------*/
7644 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7645 return or1 > or2 ? or1 : or2;
7648 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7649 OCacheLine* line;
7650 UChar descr;
7651 UWord lineoff = oc_line_offset(a);
7652 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7654 if (OC_ENABLE_ASSERTIONS) {
7655 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7658 line = find_OCacheLine( a );
7660 descr = line->u.main.descr[lineoff];
7661 if (OC_ENABLE_ASSERTIONS) {
7662 tl_assert(descr < 0x10);
7665 if (LIKELY(0 == (descr & (1 << byteoff)))) {
7666 return 0;
7667 } else {
7668 return line->u.main.w32[lineoff];
7672 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7673 OCacheLine* line;
7674 UChar descr;
7675 UWord lineoff, byteoff;
7677 if (UNLIKELY(a & 1)) {
7678 /* Handle misaligned case, slowly. */
7679 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
7680 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
7681 return merge_origins(oLo, oHi);
7684 lineoff = oc_line_offset(a);
7685 byteoff = a & 3; /* 0 or 2 */
7687 if (OC_ENABLE_ASSERTIONS) {
7688 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7690 line = find_OCacheLine( a );
7692 descr = line->u.main.descr[lineoff];
7693 if (OC_ENABLE_ASSERTIONS) {
7694 tl_assert(descr < 0x10);
7697 if (LIKELY(0 == (descr & (3 << byteoff)))) {
7698 return 0;
7699 } else {
7700 return line->u.main.w32[lineoff];
7704 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7705 OCacheLine* line;
7706 UChar descr;
7707 UWord lineoff;
7709 if (UNLIKELY(a & 3)) {
7710 /* Handle misaligned case, slowly. */
7711 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
7712 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
7713 return merge_origins(oLo, oHi);
7716 lineoff = oc_line_offset(a);
7717 if (OC_ENABLE_ASSERTIONS) {
7718 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7721 line = find_OCacheLine( a );
7723 descr = line->u.main.descr[lineoff];
7724 if (OC_ENABLE_ASSERTIONS) {
7725 tl_assert(descr < 0x10);
7728 if (LIKELY(0 == descr)) {
7729 return 0;
7730 } else {
7731 return line->u.main.w32[lineoff];
7735 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7736 OCacheLine* line;
7737 UChar descrLo, descrHi, descr;
7738 UWord lineoff;
7740 if (UNLIKELY(a & 7)) {
7741 /* Handle misaligned case, slowly. */
7742 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
7743 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
7744 return merge_origins(oLo, oHi);
7747 lineoff = oc_line_offset(a);
7748 if (OC_ENABLE_ASSERTIONS) {
7749 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7752 line = find_OCacheLine( a );
7754 descrLo = line->u.main.descr[lineoff + 0];
7755 descrHi = line->u.main.descr[lineoff + 1];
7756 descr = descrLo | descrHi;
7757 if (OC_ENABLE_ASSERTIONS) {
7758 tl_assert(descr < 0x10);
7761 if (LIKELY(0 == descr)) {
7762 return 0; /* both 32-bit chunks are defined */
7763 } else {
7764 UInt oLo = descrLo == 0 ? 0 : line->u.main.w32[lineoff + 0];
7765 UInt oHi = descrHi == 0 ? 0 : line->u.main.w32[lineoff + 1];
7766 return merge_origins(oLo, oHi);
7770 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7771 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
7772 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
7773 UInt oBoth = merge_origins(oLo, oHi);
7774 return (UWord)oBoth;
7777 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7778 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
7779 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
7780 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
7781 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
7782 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
7783 merge_origins(oQ2, oQ3));
7784 return (UWord)oAll;
7788 /*--------------------------------------------*/
7789 /*--- Origin tracking: store handlers ---*/
7790 /*--------------------------------------------*/
7792 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7793 OCacheLine* line;
7794 UWord lineoff = oc_line_offset(a);
7795 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7797 if (OC_ENABLE_ASSERTIONS) {
7798 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7801 line = find_OCacheLine( a );
7803 #if OC_PRECISION_STORE
7804 if (LIKELY(d32 == 0)) {
7805 // The byte is defined. Just mark it as so in the descr and leave the w32
7806 // unchanged. This may make the descr become zero, so the line no longer
7807 // contains useful info, but that's OK. No loss of information.
7808 line->u.main.descr[lineoff] &= ~(1 << byteoff);
7809 } else if (d32 == line->u.main.w32[lineoff]) {
7810 // At least one of the four bytes in the w32 is undefined with the same
7811 // origin. Just extend the mask. No loss of information.
7812 line->u.main.descr[lineoff] |= (1 << byteoff);
7813 } else {
7814 // Here, we have a conflict: at least one byte in the group is undefined
7815 // but with some other origin. We can't represent both origins, so we
7816 // forget about the previous origin and install this one instead.
7817 line->u.main.descr[lineoff] = (1 << byteoff);
7818 line->u.main.w32[lineoff] = d32;
7820 #else
7821 if (d32 == 0) {
7822 line->u.main.descr[lineoff] &= ~(1 << byteoff);
7823 } else {
7824 line->u.main.descr[lineoff] |= (1 << byteoff);
7825 line->u.main.w32[lineoff] = d32;
7827 #endif
7830 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7831 OCacheLine* line;
7832 UWord lineoff, byteoff;
7834 if (UNLIKELY(a & 1)) {
7835 /* Handle misaligned case, slowly. */
7836 MC_(helperc_b_store1)( a + 0, d32 );
7837 MC_(helperc_b_store1)( a + 1, d32 );
7838 return;
7841 lineoff = oc_line_offset(a);
7842 byteoff = a & 3; /* 0 or 2 */
7844 if (OC_ENABLE_ASSERTIONS) {
7845 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7848 line = find_OCacheLine( a );
7850 #if OC_PRECISION_STORE
7851 // Same logic as in the store1 case above.
7852 if (LIKELY(d32 == 0)) {
7853 line->u.main.descr[lineoff] &= ~(3 << byteoff);
7854 } else if (d32 == line->u.main.w32[lineoff]) {
7855 line->u.main.descr[lineoff] |= (3 << byteoff);
7856 line->u.main.w32[lineoff] = d32;
7857 } else {
7858 line->u.main.descr[lineoff] = (3 << byteoff);
7859 line->u.main.w32[lineoff] = d32;
7861 #else
7862 if (d32 == 0) {
7863 line->u.main.descr[lineoff] &= ~(3 << byteoff);
7864 } else {
7865 line->u.main.descr[lineoff] |= (3 << byteoff);
7866 line->u.main.w32[lineoff] = d32;
7868 #endif
7871 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7872 OCacheLine* line;
7873 UWord lineoff;
7875 if (UNLIKELY(a & 3)) {
7876 /* Handle misaligned case, slowly. */
7877 MC_(helperc_b_store2)( a + 0, d32 );
7878 MC_(helperc_b_store2)( a + 2, d32 );
7879 return;
7882 lineoff = oc_line_offset(a);
7883 if (OC_ENABLE_ASSERTIONS) {
7884 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7887 line = find_OCacheLine( a );
7889 if (d32 == 0) {
7890 line->u.main.descr[lineoff] = 0;
7891 } else {
7892 line->u.main.descr[lineoff] = 0xF;
7893 line->u.main.w32[lineoff] = d32;
7897 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7898 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7899 OCacheLine* line;
7900 UWord lineoff;
7902 if (UNLIKELY(a & 7)) {
7903 /* Handle misaligned case, slowly. */
7904 MC_(helperc_b_store4)( a + 0, d32 );
7905 MC_(helperc_b_store4)( a + 4, d32 );
7906 return;
7909 lineoff = oc_line_offset(a);
7910 if (OC_ENABLE_ASSERTIONS) {
7911 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7914 line = find_OCacheLine( a );
7916 if (d32 == 0) {
7917 line->u.main.descr[lineoff + 0] = 0;
7918 line->u.main.descr[lineoff + 1] = 0;
7919 } else {
7920 line->u.main.descr[lineoff + 0] = 0xF;
7921 line->u.main.descr[lineoff + 1] = 0xF;
7922 line->u.main.w32[lineoff + 0] = d32;
7923 line->u.main.w32[lineoff + 1] = d32;
7927 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7928 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7929 OCacheLine* line;
7930 UWord lineoff;
7932 if (UNLIKELY(a & 15)) {
7933 /* Handle misaligned case, slowly. */
7934 MC_(helperc_b_store8)( a + 0, d32 );
7935 MC_(helperc_b_store8)( a + 8, d32 );
7936 return;
7939 lineoff = oc_line_offset(a);
7940 if (OC_ENABLE_ASSERTIONS) {
7941 tl_assert(lineoff == (lineoff & 4)); /*0,4*//*since 16-aligned*/
7944 line = find_OCacheLine( a );
7946 if (d32 == 0) {
7947 line->u.main.descr[lineoff + 0] = 0;
7948 line->u.main.descr[lineoff + 1] = 0;
7949 line->u.main.descr[lineoff + 2] = 0;
7950 line->u.main.descr[lineoff + 3] = 0;
7951 } else {
7952 line->u.main.descr[lineoff + 0] = 0xF;
7953 line->u.main.descr[lineoff + 1] = 0xF;
7954 line->u.main.descr[lineoff + 2] = 0xF;
7955 line->u.main.descr[lineoff + 3] = 0xF;
7956 line->u.main.w32[lineoff + 0] = d32;
7957 line->u.main.w32[lineoff + 1] = d32;
7958 line->u.main.w32[lineoff + 2] = d32;
7959 line->u.main.w32[lineoff + 3] = d32;
7963 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7964 STATIC_ASSERT(OC_W32S_PER_LINE == 8);
7965 OCacheLine* line;
7966 UWord lineoff;
7968 if (UNLIKELY(a & 31)) {
7969 /* Handle misaligned case, slowly. */
7970 MC_(helperc_b_store16)( a + 0, d32 );
7971 MC_(helperc_b_store16)( a + 16, d32 );
7972 return;
7975 lineoff = oc_line_offset(a);
7976 if (OC_ENABLE_ASSERTIONS) {
7977 tl_assert(lineoff == 0);
7980 line = find_OCacheLine( a );
7982 if (d32 == 0) {
7983 line->u.main.descr[0] = 0;
7984 line->u.main.descr[1] = 0;
7985 line->u.main.descr[2] = 0;
7986 line->u.main.descr[3] = 0;
7987 line->u.main.descr[4] = 0;
7988 line->u.main.descr[5] = 0;
7989 line->u.main.descr[6] = 0;
7990 line->u.main.descr[7] = 0;
7991 } else {
7992 line->u.main.descr[0] = 0xF;
7993 line->u.main.descr[1] = 0xF;
7994 line->u.main.descr[2] = 0xF;
7995 line->u.main.descr[3] = 0xF;
7996 line->u.main.descr[4] = 0xF;
7997 line->u.main.descr[5] = 0xF;
7998 line->u.main.descr[6] = 0xF;
7999 line->u.main.descr[7] = 0xF;
8000 line->u.main.w32[0] = d32;
8001 line->u.main.w32[1] = d32;
8002 line->u.main.w32[2] = d32;
8003 line->u.main.w32[3] = d32;
8004 line->u.main.w32[4] = d32;
8005 line->u.main.w32[5] = d32;
8006 line->u.main.w32[6] = d32;
8007 line->u.main.w32[7] = d32;
8012 /*--------------------------------------------*/
8013 /*--- Origin tracking: sarp handlers ---*/
8014 /*--------------------------------------------*/
8016 // We may get asked to do very large SARPs (bug 446103), hence it is important
8017 // to process 32-byte chunks at a time when possible.
8019 __attribute__((noinline))
8020 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
8021 if ((a & 1) && len >= 1) {
8022 MC_(helperc_b_store1)( a, otag );
8023 a++;
8024 len--;
8026 if ((a & 2) && len >= 2) {
8027 MC_(helperc_b_store2)( a, otag );
8028 a += 2;
8029 len -= 2;
8031 if ((a & 4) && len >= 4) {
8032 MC_(helperc_b_store4)( a, otag );
8033 a += 4;
8034 len -= 4;
8036 if ((a & 8) && len >= 8) {
8037 MC_(helperc_b_store8)( a, otag );
8038 a += 8;
8039 len -= 8;
8041 if ((a & 16) && len >= 16) {
8042 MC_(helperc_b_store16)( a, otag );
8043 a += 16;
8044 len -= 16;
8046 if (len >= 32) {
8047 tl_assert(0 == (a & 31));
8048 while (len >= 32) {
8049 MC_(helperc_b_store32)( a, otag );
8050 a += 32;
8051 len -= 32;
8054 if (len >= 16) {
8055 MC_(helperc_b_store16)( a, otag );
8056 a += 16;
8057 len -= 16;
8059 if (len >= 8) {
8060 MC_(helperc_b_store8)( a, otag );
8061 a += 8;
8062 len -= 8;
8064 if (len >= 4) {
8065 MC_(helperc_b_store4)( a, otag );
8066 a += 4;
8067 len -= 4;
8069 if (len >= 2) {
8070 MC_(helperc_b_store2)( a, otag );
8071 a += 2;
8072 len -= 2;
8074 if (len >= 1) {
8075 MC_(helperc_b_store1)( a, otag );
8076 //a++;
8077 len--;
8079 tl_assert(len == 0);
8082 __attribute__((noinline))
8083 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
8084 if ((a & 1) && len >= 1) {
8085 MC_(helperc_b_store1)( a, 0 );
8086 a++;
8087 len--;
8089 if ((a & 2) && len >= 2) {
8090 MC_(helperc_b_store2)( a, 0 );
8091 a += 2;
8092 len -= 2;
8094 if ((a & 4) && len >= 4) {
8095 MC_(helperc_b_store4)( a, 0 );
8096 a += 4;
8097 len -= 4;
8099 if ((a & 8) && len >= 8) {
8100 MC_(helperc_b_store8)( a, 0 );
8101 a += 8;
8102 len -= 8;
8104 if ((a & 16) && len >= 16) {
8105 MC_(helperc_b_store16)( a, 0 );
8106 a += 16;
8107 len -= 16;
8109 if (len >= 32) {
8110 tl_assert(0 == (a & 31));
8111 while (len >= 32) {
8112 MC_(helperc_b_store32)( a, 0 );
8113 a += 32;
8114 len -= 32;
8117 if (len >= 16) {
8118 MC_(helperc_b_store16)( a, 0 );
8119 a += 16;
8120 len -= 16;
8122 if (len >= 8) {
8123 MC_(helperc_b_store8)( a, 0 );
8124 a += 8;
8125 len -= 8;
8127 if (len >= 4) {
8128 MC_(helperc_b_store4)( a, 0 );
8129 a += 4;
8130 len -= 4;
8132 if (len >= 2) {
8133 MC_(helperc_b_store2)( a, 0 );
8134 a += 2;
8135 len -= 2;
8137 if (len >= 1) {
8138 MC_(helperc_b_store1)( a, 0 );
8139 //a++;
8140 len--;
8142 tl_assert(len == 0);
8146 /*------------------------------------------------------------*/
8147 /*--- Setup and finalisation ---*/
8148 /*------------------------------------------------------------*/
8150 static void mc_post_clo_init ( void )
8152 /* If we've been asked to emit XML, mash around various other
8153 options so as to constrain the output somewhat. */
8154 if (VG_(clo_xml)) {
8155 /* Extract as much info as possible from the leak checker. */
8156 MC_(clo_leak_check) = LC_Full;
8159 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
8160 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8161 VG_(message)(Vg_UserMsg,
8162 "Warning: --freelist-big-blocks value %lld has no effect\n"
8163 "as it is >= to --freelist-vol value %lld\n",
8164 MC_(clo_freelist_big_blocks),
8165 MC_(clo_freelist_vol));
8168 if (MC_(clo_workaround_gcc296_bugs)
8169 && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8170 VG_(umsg)(
8171 "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
8172 "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
8173 "\n"
8177 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
8179 if (MC_(clo_mc_level) == 3) {
8180 /* We're doing origin tracking. */
8181 # ifdef PERF_FAST_STACK
8182 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
8183 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
8184 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
8185 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
8186 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
8187 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
8188 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
8189 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
8190 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
8191 # endif
8192 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
8193 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_make_ECU );
8194 } else {
8195 /* Not doing origin tracking */
8196 # ifdef PERF_FAST_STACK
8197 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
8198 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
8199 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
8200 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
8201 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
8202 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
8203 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
8204 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
8205 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
8206 # endif
8207 VG_(track_new_mem_stack) ( mc_new_mem_stack );
8208 VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
8211 // We assume that brk()/sbrk() does not initialise new memory. Is this
8212 // accurate? John Reiser says:
8214 // 0) sbrk() can *decrease* process address space. No zero fill is done
8215 // for a decrease, not even the fragment on the high end of the last page
8216 // that is beyond the new highest address. For maximum safety and
8217 // portability, then the bytes in the last page that reside above [the
8218 // new] sbrk(0) should be considered to be uninitialized, but in practice
8219 // it is exceedingly likely that they will retain their previous
8220 // contents.
8222 // 1) If an increase is large enough to require new whole pages, then
8223 // those new whole pages (like all new pages) are zero-filled by the
8224 // operating system. So if sbrk(0) already is page aligned, then
8225 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
8227 // 2) Any increase that lies within an existing allocated page is not
8228 // changed. So if (x = sbrk(0)) is not page aligned, then
8229 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
8230 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
8231 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
8232 // of them come along for the ride because the operating system deals
8233 // only in whole pages. Again, for maximum safety and portability, then
8234 // anything that lives above [the new] sbrk(0) should be considered
8235 // uninitialized, but in practice will retain previous contents [zero in
8236 // this case.]"
8238 // In short:
8240 // A key property of sbrk/brk is that new whole pages that are supplied
8241 // by the operating system *do* get initialized to zero.
8243 // As for the portability of all this:
8245 // sbrk and brk are not POSIX. However, any system that is a derivative
8246 // of *nix has sbrk and brk because there are too many software (such as
8247 // the Bourne shell) which rely on the traditional memory map (.text,
8248 // .data+.bss, stack) and the existence of sbrk/brk.
8250 // So we should arguably observe all this. However:
8251 // - The current inaccuracy has caused maybe one complaint in seven years(?)
8252 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
8253 // doubt most programmers know the above information.
8254 // So I'm not terribly unhappy with marking it as undefined. --njn.
8256 // [More: I think most of what John said only applies to sbrk(). It seems
8257 // that brk() always deals in whole pages. And since this event deals
8258 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
8259 // just mark all memory it allocates as defined.]
8261 # if !defined(VGO_solaris)
8262 if (MC_(clo_mc_level) == 3)
8263 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_make_ECU );
8264 else
8265 VG_(track_new_mem_brk) ( mc_new_mem_w_tid_no_ECU );
8266 # else
8267 // On Solaris, brk memory has to be marked as defined, otherwise we get
8268 // many false positives.
8269 VG_(track_new_mem_brk) ( make_mem_defined_w_tid );
8270 # endif
8272 /* This origin tracking cache is huge (~100M), so only initialise
8273 if we need it. */
8274 if (MC_(clo_mc_level) >= 3) {
8275 init_OCache();
8276 tl_assert(ocacheL1 != NULL);
8277 for (UInt i = 0; i < 4096; i++ ) {
8278 tl_assert(ocachesL2[i] != NULL);
8280 } else {
8281 tl_assert(ocacheL1 == NULL);
8282 for (UInt i = 0; i < 4096; i++ ) {
8283 tl_assert(ocachesL2[i] == NULL);
8287 MC_(chunk_poolalloc) = VG_(newPA)
8288 (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
8289 1000,
8290 VG_(malloc),
8291 "mc.cMC.1 (MC_Chunk pools)",
8292 VG_(free));
8294 /* Do not check definedness of guest state if --undef-value-errors=no */
8295 if (MC_(clo_mc_level) >= 2)
8296 VG_(track_pre_reg_read) ( mc_pre_reg_read );
8298 if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
8299 if (MC_(clo_keep_stacktraces) == KS_none
8300 || MC_(clo_keep_stacktraces) == KS_free)
8301 VG_(fmsg_bad_option)("--keep-stacktraces",
8302 "To use --xtree-memory=full, you must"
8303 " keep at least the alloc stacktrace\n");
8304 // Activate full xtree memory profiling.
8305 VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
8310 static void print_SM_info(const HChar* type, Int n_SMs)
8312 VG_(message)(Vg_DebugMsg,
8313 " memcheck: SMs: %s = %d (%luk, %luM)\n",
8314 type,
8315 n_SMs,
8316 n_SMs * sizeof(SecMap) / 1024UL,
8317 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
8320 static void mc_print_stats (void)
8322 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
8324 VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
8325 VG_(free_queue_volume), VG_(free_queue_length));
8326 VG_(message)(Vg_DebugMsg,
8327 " memcheck: sanity checks: %d cheap, %d expensive\n",
8328 n_sanity_cheap, n_sanity_expensive );
8329 VG_(message)(Vg_DebugMsg,
8330 " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
8331 n_auxmap_L2_nodes,
8332 n_auxmap_L2_nodes * 64,
8333 n_auxmap_L2_nodes / 16 );
8334 VG_(message)(Vg_DebugMsg,
8335 " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
8336 n_auxmap_L1_searches, n_auxmap_L1_cmps,
8337 (10ULL * n_auxmap_L1_cmps)
8338 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
8340 VG_(message)(Vg_DebugMsg,
8341 " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
8342 n_auxmap_L2_searches, n_auxmap_L2_nodes
8345 print_SM_info("n_issued ", n_issued_SMs);
8346 print_SM_info("n_deissued ", n_deissued_SMs);
8347 print_SM_info("max_noaccess ", max_noaccess_SMs);
8348 print_SM_info("max_undefined", max_undefined_SMs);
8349 print_SM_info("max_defined ", max_defined_SMs);
8350 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
8352 // Three DSMs, plus the non-DSM ones
8353 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
8354 // The 3*sizeof(Word) bytes is the AVL node metadata size.
8355 // The VG_ROUNDUP is because the OSet pool allocator will/must align
8356 // the elements on pointer size.
8357 // Note that the pool allocator has some additional small overhead
8358 // which is not counted in the below.
8359 // Hardwiring this logic sucks, but I don't see how else to do it.
8360 max_secVBit_szB = max_secVBit_nodes *
8361 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
8362 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
8364 VG_(message)(Vg_DebugMsg,
8365 " memcheck: max sec V bit nodes: %d (%luk, %luM)\n",
8366 max_secVBit_nodes, max_secVBit_szB / 1024,
8367 max_secVBit_szB / (1024 * 1024));
8368 VG_(message)(Vg_DebugMsg,
8369 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8370 sec_vbits_new_nodes + sec_vbits_updates,
8371 sec_vbits_new_nodes, sec_vbits_updates );
8372 VG_(message)(Vg_DebugMsg,
8373 " memcheck: max shadow mem size: %luk, %luM\n",
8374 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
8376 if (MC_(clo_mc_level) >= 3) {
8377 VG_(message)(Vg_DebugMsg,
8378 " ocacheL1: %'14lu refs %'14lu misses (%'lu lossage)\n",
8379 stats_ocacheL1_find,
8380 stats_ocacheL1_misses,
8381 stats_ocacheL1_lossage );
8382 VG_(message)(Vg_DebugMsg,
8383 " ocacheL1: %'14lu at 0 %'14lu at 1\n",
8384 stats_ocacheL1_find - stats_ocacheL1_misses
8385 - stats_ocacheL1_found_at_1
8386 - stats_ocacheL1_found_at_N,
8387 stats_ocacheL1_found_at_1 );
8388 VG_(message)(Vg_DebugMsg,
8389 " ocacheL1: %'14lu at 2+ %'14lu move-fwds\n",
8390 stats_ocacheL1_found_at_N,
8391 stats_ocacheL1_movefwds );
8392 VG_(message)(Vg_DebugMsg,
8393 " ocacheL1: %'14lu sizeB %'14d useful\n",
8394 (SizeT)sizeof(OCache),
8395 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
8396 VG_(message)(Vg_DebugMsg,
8397 " ocacheL2: %'14lu finds %'14lu misses\n",
8398 stats__ocacheL2_finds,
8399 stats__ocacheL2_misses );
8400 VG_(message)(Vg_DebugMsg,
8401 " ocacheL2: %'14lu adds %'14lu dels\n",
8402 stats__ocacheL2_adds,
8403 stats__ocacheL2_dels );
8404 VG_(message)(Vg_DebugMsg,
8405 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
8406 stats__ocacheL2_n_nodes_max,
8407 stats__ocacheL2_n_nodes );
8408 VG_(message)(Vg_DebugMsg,
8409 " niacache: %'12lu refs %'12lu misses\n",
8410 stats__nia_cache_queries, stats__nia_cache_misses);
8411 } else {
8412 tl_assert(ocacheL1 == NULL);
8413 for (UInt i = 0; i < 4096; i++ ) {
8414 tl_assert(ocachesL2[1] == NULL);
8420 static void mc_fini ( Int exitcode )
8422 MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
8423 MC_(print_malloc_stats)();
8425 if (MC_(clo_leak_check) != LC_Off) {
8426 LeakCheckParams lcp;
8427 HChar* xt_filename = NULL;
8428 lcp.mode = MC_(clo_leak_check);
8429 lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
8430 lcp.heuristics = MC_(clo_leak_check_heuristics);
8431 lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
8432 lcp.deltamode = LCD_Any;
8433 lcp.max_loss_records_output = 999999999;
8434 lcp.requested_by_monitor_command = False;
8435 if (MC_(clo_xtree_leak)) {
8436 xt_filename = VG_(expand_file_name)("--xtree-leak-file",
8437 MC_(clo_xtree_leak_file));
8438 lcp.xt_filename = xt_filename;
8439 lcp.mode = LC_Full;
8440 lcp.show_leak_kinds = MC_(all_Reachedness)();
8442 else
8443 lcp.xt_filename = NULL;
8444 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
8445 if (MC_(clo_xtree_leak))
8446 VG_(free)(xt_filename);
8447 } else {
8448 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8449 VG_(umsg)(
8450 "For a detailed leak analysis, rerun with: --leak-check=full\n"
8451 "\n"
8456 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
8457 && MC_(clo_mc_level) == 2) {
8458 VG_(message)(Vg_UserMsg,
8459 "Use --track-origins=yes to see where "
8460 "uninitialised values come from\n");
8463 /* Print a warning if any client-request generated ignore-ranges
8464 still exist. It would be reasonable to expect that a properly
8465 written program would remove any such ranges before exiting, and
8466 since they are a bit on the dangerous side, let's comment. By
8467 contrast ranges which are specified on the command line normally
8468 pertain to hardware mapped into the address space, and so we
8469 can't expect the client to have got rid of them. */
8470 if (gIgnoredAddressRanges) {
8471 UInt i, nBad = 0;
8472 for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
8473 UWord val = IAR_INVALID;
8474 UWord key_min = ~(UWord)0;
8475 UWord key_max = (UWord)0;
8476 VG_(indexRangeMap)( &key_min, &key_max, &val,
8477 gIgnoredAddressRanges, i );
8478 if (val != IAR_ClientReq)
8479 continue;
8480 /* Print the offending range. Also, if it is the first,
8481 print a banner before it. */
8482 nBad++;
8483 if (nBad == 1) {
8484 VG_(umsg)(
8485 "WARNING: exiting program has the following client-requested\n"
8486 "WARNING: address error disablement range(s) still in force,\n"
8487 "WARNING: "
8488 "possibly as a result of some mistake in the use of the\n"
8489 "WARNING: "
8490 "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8493 VG_(umsg)(" [%u] 0x%016lx-0x%016lx %s\n",
8494 i, key_min, key_max, showIARKind(val));
8498 done_prof_mem();
8500 if (VG_(clo_stats))
8501 mc_print_stats();
8503 if (0) {
8504 VG_(message)(Vg_DebugMsg,
8505 "------ Valgrind's client block stats follow ---------------\n" );
8506 show_client_block_stats();
8510 /* mark the given addr/len unaddressable for watchpoint implementation
8511 The PointKind will be handled at access time */
8512 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
8513 Addr addr, SizeT len)
8515 /* GDBTD this is somewhat fishy. We might rather have to save the previous
8516 accessibility and definedness in gdbserver so as to allow restoring it
8517 properly. Currently, we assume that the user only watches things
8518 which are properly addressable and defined */
8519 if (insert)
8520 MC_(make_mem_noaccess) (addr, len);
8521 else
8522 MC_(make_mem_defined) (addr, len);
8523 return True;
8526 static void mc_pre_clo_init(void)
8528 VG_(details_name) ("Memcheck");
8529 VG_(details_version) (NULL);
8530 VG_(details_description) ("a memory error detector");
8531 VG_(details_copyright_author)(
8532 "Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al.");
8533 VG_(details_bug_reports_to) (VG_BUGS_TO);
8534 VG_(details_avg_translation_sizeB) ( 640 );
8536 VG_(basic_tool_funcs) (mc_post_clo_init,
8537 MC_(instrument),
8538 mc_fini);
8540 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
8543 VG_(needs_core_errors) ();
8544 VG_(needs_tool_errors) (MC_(eq_Error),
8545 MC_(before_pp_Error),
8546 MC_(pp_Error),
8547 True,/*show TIDs for errors*/
8548 MC_(update_Error_extra),
8549 MC_(is_recognised_suppression),
8550 MC_(read_extra_suppression_info),
8551 MC_(error_matches_suppression),
8552 MC_(get_error_name),
8553 MC_(get_extra_suppression_info),
8554 MC_(print_extra_suppression_use),
8555 MC_(update_extra_suppression_use));
8556 VG_(needs_libc_freeres) ();
8557 VG_(needs_cxx_freeres) ();
8558 VG_(needs_command_line_options)(mc_process_cmd_line_options,
8559 mc_print_usage,
8560 mc_print_debug_usage);
8561 VG_(needs_client_requests) (mc_handle_client_request);
8562 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
8563 mc_expensive_sanity_check);
8564 VG_(needs_print_stats) (mc_print_stats);
8565 VG_(needs_info_location) (MC_(pp_describe_addr));
8566 VG_(needs_malloc_replacement) (MC_(malloc),
8567 MC_(__builtin_new),
8568 MC_(__builtin_new_aligned),
8569 MC_(__builtin_vec_new),
8570 MC_(__builtin_vec_new_aligned),
8571 MC_(memalign),
8572 MC_(calloc),
8573 MC_(free),
8574 MC_(__builtin_delete),
8575 MC_(__builtin_delete_aligned),
8576 MC_(__builtin_vec_delete),
8577 MC_(__builtin_vec_delete_aligned),
8578 MC_(realloc),
8579 MC_(malloc_usable_size),
8580 MC_MALLOC_DEFAULT_REDZONE_SZB );
8581 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
8583 VG_(needs_xml_output) ();
8585 VG_(track_new_mem_startup) ( mc_new_mem_startup );
8587 // Handling of mmap and mprotect isn't simple (well, it is simple,
8588 // but the justification isn't.) See comments above, just prior to
8589 // mc_new_mem_mmap.
8590 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
8591 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
8593 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
8595 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
8596 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
8597 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
8599 /* Defer the specification of the new_mem_stack functions to the
8600 post_clo_init function, since we need to first parse the command
8601 line before deciding which set to use. */
8603 # ifdef PERF_FAST_STACK
8604 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
8605 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
8606 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
8607 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
8608 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
8609 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
8610 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
8611 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
8612 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
8613 # endif
8614 VG_(track_die_mem_stack) ( mc_die_mem_stack );
8616 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
8618 VG_(track_pre_mem_read) ( check_mem_is_defined );
8619 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
8620 VG_(track_pre_mem_write) ( check_mem_is_addressable );
8621 VG_(track_post_mem_write) ( mc_post_mem_write );
8623 VG_(track_post_reg_write) ( mc_post_reg_write );
8624 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
8626 if (MC_(clo_mc_level) >= 2) {
8627 VG_(track_copy_mem_to_reg) ( mc_copy_mem_to_reg );
8628 VG_(track_copy_reg_to_mem) ( mc_copy_reg_to_mem );
8631 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
8633 init_shadow_memory();
8634 // MC_(chunk_poolalloc) must be allocated in post_clo_init
8635 tl_assert(MC_(chunk_poolalloc) == NULL);
8636 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
8637 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
8638 init_prof_mem();
8640 tl_assert( mc_expensive_sanity_check() );
8642 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8643 tl_assert(sizeof(UWord) == sizeof(Addr));
8644 // Call me paranoid. I don't care.
8645 tl_assert(sizeof(void*) == sizeof(Addr));
8647 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8648 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
8650 /* This is small. Always initialise it. */
8651 init_nia_to_ecu_cache();
8653 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8654 if we need to, since the command line args haven't been
8655 processed yet. Hence defer it to mc_post_clo_init. */
8656 tl_assert(ocacheL1 == NULL);
8657 for (UInt i = 0; i < 4096; i++ ) {
8658 tl_assert(ocachesL2[i] == NULL);
8661 /* Check some important stuff. See extensive comments above
8662 re UNALIGNED_OR_HIGH for background. */
8663 # if VG_WORDSIZE == 4
8664 tl_assert(sizeof(void*) == 4);
8665 tl_assert(sizeof(Addr) == 4);
8666 tl_assert(sizeof(UWord) == 4);
8667 tl_assert(sizeof(Word) == 4);
8668 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
8669 tl_assert(MASK(1) == 0UL);
8670 tl_assert(MASK(2) == 1UL);
8671 tl_assert(MASK(4) == 3UL);
8672 tl_assert(MASK(8) == 7UL);
8673 # else
8674 tl_assert(VG_WORDSIZE == 8);
8675 tl_assert(sizeof(void*) == 8);
8676 tl_assert(sizeof(Addr) == 8);
8677 tl_assert(sizeof(UWord) == 8);
8678 tl_assert(sizeof(Word) == 8);
8679 tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
8680 tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
8681 tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
8682 tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
8683 tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
8684 # endif
8686 /* Check some assertions to do with the instrumentation machinery. */
8687 MC_(do_instrumentation_startup_checks)();
8690 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
8692 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
8694 /*--------------------------------------------------------------------*/
8695 /*--- end mc_main.c ---*/
8696 /*--------------------------------------------------------------------*/