Run power_ISA2_0[57] tests with -q
[valgrind.git] / exp-sgcheck / sg_main.c
blob9b81dceff98ba3d94fba232c0ea2d00428b2110e
2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker. ---*/
4 /*--- This file checks stack and global array accesses. ---*/
5 /*--- sg_main.c ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
10 use in programs.
12 Copyright (C) 2008-2017 OpenWorks Ltd
13 info@open-works.co.uk
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
30 The GNU General Public License is contained in the file COPYING.
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
51 #include "pc_common.h"
53 #include "sg_main.h" // self
56 static
57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
60 //////////////////////////////////////////////////////////////
61 // //
62 // Basic Stuff //
63 // //
64 //////////////////////////////////////////////////////////////
66 static inline Bool is_sane_TId ( ThreadId tid )
68 return tid >= 0 && tid < VG_N_THREADS
69 && tid != VG_INVALID_THREADID;
72 static void* sg_malloc ( const HChar* cc, SizeT n ) {
73 void* p;
74 tl_assert(n > 0);
75 p = VG_(malloc)( cc, n );
76 return p;
79 static void sg_free ( void* p ) {
80 tl_assert(p);
81 VG_(free)(p);
85 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
86 first interval is lower, 1 if the first interval is higher, and 0
87 if there is any overlap. Redundant paranoia with casting is there
88 following what looked distinctly like a bug in gcc-4.1.2, in which
89 some of the comparisons were done signedly instead of
90 unsignedly. */
91 inline
92 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
93 Addr a2, SizeT n2 ) {
94 UWord a1w = (UWord)a1;
95 UWord n1w = (UWord)n1;
96 UWord a2w = (UWord)a2;
97 UWord n2w = (UWord)n2;
98 tl_assert(n1w > 0 && n2w > 0);
99 if (a1w + n1w <= a2w) return -1L;
100 if (a2w + n2w <= a1w) return 1L;
101 return 0;
104 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
105 within [aBig,aBig+nBig). */
106 inline
107 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
108 Addr aSmall, SizeT nSmall ) {
109 tl_assert(nBig > 0 && nSmall > 0);
110 return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
113 inline
114 static Addr Addr__min ( Addr a1, Addr a2 ) {
115 return a1 < a2 ? a1 : a2;
118 inline
119 static Addr Addr__max ( Addr a1, Addr a2 ) {
120 return a1 < a2 ? a2 : a1;
124 //////////////////////////////////////////////////////////////
125 // //
126 // StackBlocks Persistent Cache //
127 // //
128 //////////////////////////////////////////////////////////////
130 /* We maintain a set of XArray* of StackBlocks. These are never
131 freed. When a new StackBlock vector is acquired from
132 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
133 If not present, it is added. If present, the just-acquired one is
134 freed and the copy used.
136 This simplifies storage management elsewhere. It allows us to
137 assume that a pointer to an XArray* of StackBlock is valid forever.
138 It also means there are no duplicates anywhere, which could be
139 important from a space point of view for programs that generate a
140 lot of translations, or where translations are frequently discarded
141 and re-made.
143 Note that we normalise the arrays by sorting the elements according
144 to an arbitrary total order, so as to avoid the situation that two
145 vectors describe the same set of variables but are not structurally
146 identical. */
148 static inline Bool StackBlock__sane ( const StackBlock* fb )
150 if (fb->name[ sizeof(fb->name)-1 ] != 0)
151 return False;
152 if (fb->spRel != False && fb->spRel != True)
153 return False;
154 if (fb->isVec != False && fb->isVec != True)
155 return False;
156 return True;
159 /* Generate an arbitrary total ordering on StackBlocks. */
160 static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
162 Word r;
163 tl_assert(StackBlock__sane(fb1));
164 tl_assert(StackBlock__sane(fb2));
165 /* Hopefully the .base test hits most of the time. For the blocks
166 associated with any particular instruction, if the .base values
167 are the same then probably it doesn't make sense for the other
168 fields to be different. But this is supposed to be a completely
169 general structural total order, so we have to compare everything
170 anyway. */
171 if (fb1->base < fb2->base) return -1;
172 if (fb1->base > fb2->base) return 1;
173 /* compare sizes */
174 if (fb1->szB < fb2->szB) return -1;
175 if (fb1->szB > fb2->szB) return 1;
176 /* compare sp/fp flag */
177 if (fb1->spRel < fb2->spRel) return -1;
178 if (fb1->spRel > fb2->spRel) return 1;
179 /* compare is/is-not array-typed flag */
180 if (fb1->isVec < fb2->isVec) return -1;
181 if (fb1->isVec > fb2->isVec) return 1;
182 /* compare the name */
183 r = (Word)VG_(strcmp)(fb1->name, fb2->name);
184 return r;
187 /* Returns True if all fields except .szB are the same. szBs may or
188 may not be the same; they are simply not consulted. */
189 static Bool StackBlock__all_fields_except_szB_are_equal (
190 StackBlock* fb1,
191 StackBlock* fb2
194 tl_assert(StackBlock__sane(fb1));
195 tl_assert(StackBlock__sane(fb2));
196 return fb1->base == fb2->base
197 && fb1->spRel == fb2->spRel
198 && fb1->isVec == fb2->isVec
199 && 0 == VG_(strcmp)(fb1->name, fb2->name);
203 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
204 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206 Word i, r, n1, n2;
207 n1 = VG_(sizeXA)( fb1s );
208 n2 = VG_(sizeXA)( fb2s );
209 if (n1 < n2) return -1;
210 if (n1 > n2) return 1;
211 for (i = 0; i < n1; i++) {
212 StackBlock *fb1, *fb2;
213 fb1 = VG_(indexXA)( fb1s, i );
214 fb2 = VG_(indexXA)( fb2s, i );
215 r = StackBlock__cmp( fb1, fb2 );
216 if (r != 0) return r;
218 tl_assert(i == n1 && i == n2);
219 return 0;
222 static void pp_StackBlocks ( XArray* sbs )
224 Word i, n = VG_(sizeXA)( sbs );
225 VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
226 for (i = 0; i < n; i++) {
227 StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
228 VG_(message)(Vg_DebugMsg,
229 " StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
230 sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
231 sb->isVec ? 'Y' : 'N', &sb->name[0]
234 VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
238 /* ---------- The StackBlock vector cache ---------- */
240 static WordFM* /* XArray* of StackBlock -> nothing */
241 frameBlocks_set = NULL;
243 static void init_StackBlocks_set ( void )
245 tl_assert(!frameBlocks_set);
246 frameBlocks_set
247 = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
248 (Word(*)(UWord,UWord))StackBlocks__cmp );
249 tl_assert(frameBlocks_set);
252 /* Find the given StackBlock-vector in our collection thereof. If
253 found, deallocate the supplied one, and return the address of the
254 copy. If not found, add the supplied one to our collection and
255 return its address. */
256 static XArray* /* of StackBlock */
257 StackBlocks__find_and_dealloc__or_add
258 ( XArray* /* of StackBlock */ orig )
260 UWord key, val;
262 /* First, normalise, as per comments above. */
263 VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
264 VG_(sortXA)( orig );
266 /* Now get rid of any exact duplicates. */
267 nuke_dups:
268 { Word r, w, nEQ, n = VG_(sizeXA)( orig );
269 if (n >= 2) {
270 w = 0;
271 nEQ = 0;
272 for (r = 0; r < n; r++) {
273 if (r+1 < n) {
274 StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
275 StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
276 Word c = StackBlock__cmp(pR0,pR1);
277 tl_assert(c == -1 || c == 0);
278 if (c == 0) { nEQ++; continue; }
280 if (w != r) {
281 StackBlock* pW = VG_(indexXA)( orig, w );
282 StackBlock* pR = VG_(indexXA)( orig, r );
283 *pW = *pR;
285 w++;
287 tl_assert(r == n);
288 tl_assert(w + nEQ == n);
289 if (w < n) {
290 VG_(dropTailXA)( orig, n-w );
292 if (0) VG_(printf)("delta %ld\n", n-w);
296 /* Deal with the following strangeness, where two otherwise
297 identical blocks are claimed to have different sizes. In which
298 case we use the larger size. */
299 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
300 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
301 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
303 { Word i, n = VG_(sizeXA)( orig );
304 if (n >= 2) {
305 for (i = 0; i < n-1; i++) {
306 StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
307 StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
308 if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
309 /* They can't be identical because the previous tidying
310 pass would have removed the duplicates. And they
311 can't be > because the earlier sorting pass would
312 have ordered otherwise-identical descriptors
313 according to < on .szB fields. Hence: */
314 tl_assert(sb0->szB < sb1->szB);
315 sb0->szB = sb1->szB;
316 /* This makes the blocks identical, at the size of the
317 larger one. Rather than go to all the hassle of
318 sliding the rest down, simply go back to the
319 remove-duplicates stage. The assertion guarantees
320 that we eventually make progress, since the rm-dups
321 stage will get rid of one of the blocks. This is
322 expected to happen only exceedingly rarely. */
323 tl_assert(StackBlock__cmp(sb0,sb1) == 0);
324 goto nuke_dups;
330 /* If there are any blocks which overlap and have the same
331 fpRel-ness, junk the whole descriptor; it's obviously bogus.
332 Icc11 certainly generates bogus info from time to time.
334 This check is pretty weak; really we ought to have a stronger
335 sanity check. */
336 { Word i, n = VG_(sizeXA)( orig );
337 static Int moans = 3;
338 for (i = 0; i < n-1; i++) {
339 StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
340 StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
341 if (sb1->spRel == sb2->spRel
342 && (sb1->base >= sb2->base
343 || sb1->base + sb1->szB > sb2->base)) {
344 if (moans > 0 && !VG_(clo_xml)) {
345 moans--;
346 VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
347 "overlapping stack blocks\n");
348 if (VG_(clo_verbosity) >= 2)
349 pp_StackBlocks(orig);
350 if (moans == 0)
351 VG_(message)(Vg_UserMsg, "Further instances of this "
352 "message will not be shown\n" );
354 VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
355 break;
360 /* Now, do we have it already? */
361 if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
362 /* yes */
363 XArray* res;
364 tl_assert(val == 0);
365 tl_assert(key != (UWord)orig);
366 VG_(deleteXA)(orig);
367 res = (XArray*)key;
368 return res;
369 } else {
370 /* no */
371 VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
372 return orig;
376 /* Top level function for getting the StackBlock vector for a given
377 instruction. It is guaranteed that the returned pointer will be
378 valid for the entire rest of the run, and also that the addresses
379 of the individual elements of the array will not change. */
381 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
383 XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
384 tl_assert(blocks);
385 return StackBlocks__find_and_dealloc__or_add( blocks );
389 //////////////////////////////////////////////////////////////
390 // //
391 // GlobalBlocks Persistent Cache //
392 // //
393 //////////////////////////////////////////////////////////////
395 /* Generate an arbitrary total ordering on GlobalBlocks. */
396 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
398 Word r;
399 /* compare addrs */
400 if (gb1->addr < gb2->addr) return -1;
401 if (gb1->addr > gb2->addr) return 1;
402 /* compare sizes */
403 if (gb1->szB < gb2->szB) return -1;
404 if (gb1->szB > gb2->szB) return 1;
405 /* compare is/is-not array-typed flag */
406 if (gb1->isVec < gb2->isVec) return -1;
407 if (gb1->isVec > gb2->isVec) return 1;
408 /* compare the name */
409 r = (Word)VG_(strcmp)(gb1->name, gb2->name);
410 if (r != 0) return r;
411 /* compare the soname */
412 r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
413 return r;
416 static WordFM* /* GlobalBlock* -> nothing */
417 globalBlock_set = NULL;
419 static void init_GlobalBlock_set ( void )
421 tl_assert(!globalBlock_set);
422 globalBlock_set
423 = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
424 (Word(*)(UWord,UWord))GlobalBlock__cmp );
425 tl_assert(globalBlock_set);
429 /* Top level function for making GlobalBlocks persistent. Call here
430 with a non-persistent version, and the returned one is guaranteed
431 to be valid for the entire rest of the run. The supplied one is
432 copied, not stored, so can be freed after the call. */
434 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
436 UWord key, val;
437 /* Now, do we have it already? */
438 if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
439 /* yes, return the copy */
440 GlobalBlock* res;
441 tl_assert(val == 0);
442 res = (GlobalBlock*)key;
443 tl_assert(res != orig);
444 return res;
445 } else {
446 /* no. clone it, store the clone and return the clone's
447 address. */
448 GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
449 sizeof(GlobalBlock) );
450 tl_assert(clone);
451 *clone = *orig;
452 VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
453 return clone;
458 //////////////////////////////////////////////////////////////
459 // //
460 // Interval tree of StackTreeBlock //
461 // //
462 //////////////////////////////////////////////////////////////
464 /* A node in a stack interval tree. Zero length intervals (.szB == 0)
465 are not allowed.
467 A stack interval tree is a (WordFM StackTreeNode* void). There is
468 one stack interval tree for each thread.
470 typedef
471 struct {
472 Addr addr;
473 SizeT szB; /* copied from .descr->szB */
474 StackBlock* descr; /* it's an instance of this block */
475 UWord depth; /* depth of stack at time block was pushed */
477 StackTreeNode;
479 static void pp_StackTree ( WordFM* sitree, const HChar* who )
481 UWord keyW, valW;
482 VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
483 VG_(initIterFM)( sitree );
484 while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
485 StackTreeNode* nd = (StackTreeNode*)keyW;
486 VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
487 nd->descr, nd->descr->name, nd->descr->szB);
489 VG_(printf)(">>> END pp_StackTree %s\n", who );
492 /* Interval comparison function for StackTreeNode */
493 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
494 StackTreeNode* sn2 )
496 return cmp_nonempty_intervals(sn1->addr, sn1->szB,
497 sn2->addr, sn2->szB);
500 /* Find the node holding 'a', if any. */
501 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
503 UWord keyW, valW;
504 StackTreeNode key;
505 tl_assert(sitree);
506 key.addr = a;
507 key.szB = 1;
508 if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
509 StackTreeNode* res = (StackTreeNode*)keyW;
510 tl_assert(valW == 0);
511 tl_assert(res != &key);
512 return res;
513 } else {
514 return NULL;
518 /* Note that the supplied XArray of FrameBlock must have been
519 made persistent already. */
520 __attribute__((noinline))
521 static void add_blocks_to_StackTree (
522 /*MOD*/WordFM* sitree,
523 XArray* /* FrameBlock */ descrs,
524 XArray* /* Addr */ bases,
525 UWord depth
528 Bool debug = (Bool)0;
529 Word i, nDescrs, nBases;
531 nDescrs = VG_(sizeXA)( descrs ),
532 nBases = VG_(sizeXA)( bases );
533 tl_assert(nDescrs == nBases);
535 if (nDescrs == 0) return;
537 tl_assert(sitree);
538 if (debug) {
539 VG_(printf)("\ndepth = %lu\n", depth);
540 pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
541 pp_StackBlocks(descrs);
544 for (i = 0; i < nDescrs; i++) {
545 Bool already_present;
546 StackTreeNode* nyu;
547 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
548 StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
549 tl_assert(descr->szB > 0);
550 nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
551 nyu->addr = addr;
552 nyu->szB = descr->szB;
553 nyu->descr = descr;
554 nyu->depth = depth;
555 if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
556 already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
557 /* The interval can't already be there; else we have
558 overlapping stack blocks. */
559 tl_assert(!already_present);
560 if (debug) {
561 pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
564 if (debug) {
565 pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
566 VG_(printf)("\n");
570 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
571 XArray* /* Addr */ bases )
573 UWord oldK, oldV;
574 Word i, nBases = VG_(sizeXA)( bases );
575 for (i = 0; i < nBases; i++) {
576 Bool b;
577 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
578 StackTreeNode* nd = find_StackTreeNode(sitree, addr);
579 /* The interval must be there; we added it earlier when
580 the associated frame was created. */
581 tl_assert(nd);
582 b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
583 /* we just found the block! */
584 tl_assert(b);
585 tl_assert(oldV == 0);
586 tl_assert(nd == (StackTreeNode*)oldK);
587 sg_free(nd);
592 static void delete_StackTree__kFin ( UWord keyW ) {
593 StackTreeNode* nd = (StackTreeNode*)keyW;
594 tl_assert(nd);
595 sg_free(nd);
597 static void delete_StackTree__vFin ( UWord valW ) {
598 tl_assert(valW == 0);
600 static void delete_StackTree ( WordFM* sitree )
602 VG_(deleteFM)( sitree,
603 delete_StackTree__kFin, delete_StackTree__vFin );
606 static WordFM* new_StackTree ( void ) {
607 return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
608 (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
612 //////////////////////////////////////////////////////////////
613 // //
614 // Interval tree of GlobalTreeBlock //
615 // //
616 //////////////////////////////////////////////////////////////
618 /* A node in a global interval tree. Zero length intervals
619 (.szB == 0) are not allowed.
621 A global interval tree is a (WordFM GlobalTreeNode* void). There
622 is one global interval tree for the entire process.
624 typedef
625 struct {
626 Addr addr; /* copied from .descr->addr */
627 SizeT szB; /* copied from .descr->szB */
628 GlobalBlock* descr; /* it's this block */
630 GlobalTreeNode;
632 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
633 tl_assert(nd->descr);
634 VG_(printf)("GTNode [%#lx,+%lu) %s",
635 nd->addr, nd->szB, nd->descr->name);
638 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
639 const HChar* who )
641 UWord keyW, valW;
642 GlobalTreeNode* nd;
643 VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
644 VG_(initIterFM)( gitree );
645 while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
646 tl_assert(valW == 0);
647 nd = (GlobalTreeNode*)keyW;
648 VG_(printf)(" ");
649 GlobalTreeNode__pp(nd);
650 VG_(printf)("\n");
652 VG_(doneIterFM)( gitree );
653 VG_(printf)(">>>\n");
656 /* Interval comparison function for GlobalTreeNode */
657 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
658 GlobalTreeNode* gn2 )
660 return cmp_nonempty_intervals( gn1->addr, gn1->szB,
661 gn2->addr, gn2->szB );
664 /* Find the node holding 'a', if any. */
665 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
667 UWord keyW, valW;
668 GlobalTreeNode key;
669 key.addr = a;
670 key.szB = 1;
671 if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
672 GlobalTreeNode* res = (GlobalTreeNode*)keyW;
673 tl_assert(valW == 0);
674 tl_assert(res != &key);
675 return res;
676 } else {
677 return NULL;
681 /* Note that the supplied GlobalBlock must have been made persistent
682 already. */
683 static void add_block_to_GlobalTree (
684 /*MOD*/WordFM* gitree,
685 GlobalBlock* descr
688 Bool already_present;
689 GlobalTreeNode *nyu, *nd;
690 UWord keyW, valW;
691 static Int moans = 3;
693 tl_assert(descr->szB > 0);
694 nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
695 nyu->addr = descr->addr;
696 nyu->szB = descr->szB;
697 nyu->descr = descr;
699 /* Basically it's an error to add a global block to the tree that
700 is already in the tree. However, detect and ignore attempts to
701 insert exact duplicates; they do appear for some reason
702 (possible a bug in m_debuginfo?) */
703 already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
704 if (already_present) {
705 tl_assert(valW == 0);
706 nd = (GlobalTreeNode*)keyW;
707 tl_assert(nd);
708 tl_assert(nd != nyu);
709 tl_assert(nd->descr);
710 tl_assert(nyu->descr);
711 if (nd->addr == nyu->addr && nd->szB == nyu->szB
712 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
713 /* Although it seems reasonable to demand that duplicate
714 blocks have identical names, that is too strict. For
715 example, reading debuginfo from glibc produces two
716 otherwise identical blocks with names "tzname" and
717 "__tzname". A constraint of the form "must be identical,
718 or one must be a substring of the other" would fix that.
719 However, such trickery is scuppered by the fact that we
720 truncate all variable names to 15 characters to make
721 storage management simpler, hence giving pairs like
722 "__EI___pthread_" (truncated) vs "__pthread_keys". So
723 it's simplest just to skip the name comparison
724 completely. */
725 && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
726 /* exact duplicate; ignore it */
727 sg_free(nyu);
728 return;
730 /* else fall through; the assertion below will catch it */
733 already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
734 /* The interval can't already be there; else we have
735 overlapping global blocks. */
736 /* Unfortunately (25 Jan 09) at least icc11 has been seen to
737 generate overlapping block descriptions in the Dwarf3; clearly
738 bogus. */
739 if (already_present && moans > 0 && !VG_(clo_xml)) {
740 moans--;
741 VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
742 "overlapping global blocks\n");
743 if (VG_(clo_verbosity) >= 2) {
744 GlobalTree__pp( gitree,
745 "add_block_to_GlobalTree: non-exact duplicate" );
746 VG_(printf)("Overlapping block: ");
747 GlobalTreeNode__pp(nyu);
748 VG_(printf)("\n");
750 if (moans == 0)
751 VG_(message)(Vg_UserMsg, "Further instances of this "
752 "message will not be shown\n" );
754 /* tl_assert(!already_present); */
757 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
758 Addr a, SizeT szB )
760 /* One easy way to do this: look up [a,a+szB) in the tree. That
761 will either succeed, producing a block which intersects that
762 range, in which case we delete it and repeat; or it will fail,
763 in which case there are no blocks intersecting the range, and we
764 can bring the process to a halt. */
765 UWord keyW, valW, oldK, oldV;
766 GlobalTreeNode key, *nd;
767 Bool b, anyFound;
769 tl_assert(szB > 0);
771 anyFound = False;
773 key.addr = a;
774 key.szB = szB;
776 while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
777 anyFound = True;
778 nd = (GlobalTreeNode*)keyW;
779 tl_assert(valW == 0);
780 tl_assert(nd != &key);
781 tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
783 b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
784 tl_assert(b);
785 tl_assert(oldV == 0);
786 tl_assert(oldK == keyW); /* check we deleted the node we just found */
789 return anyFound;
793 //////////////////////////////////////////////////////////////
794 // //
795 // Invar //
796 // //
797 //////////////////////////////////////////////////////////////
799 /* An invariant, as resulting from watching the destination of a
800 memory referencing instruction. Initially is Inv_Unset until the
801 instruction makes a first access. */
803 typedef
804 enum {
805 Inv_Unset=1, /* not established yet */
806 Inv_Unknown, /* unknown location */
807 Inv_Stack0, /* array-typed stack block in innermost frame */
808 Inv_StackN, /* array-typed stack block in non-innermost frame */
809 Inv_Global, /* array-typed global block */
811 InvarTag;
813 typedef
814 struct {
815 InvarTag tag;
816 union {
817 struct {
818 } Unset;
819 struct {
820 } Unknown;
821 struct {
822 Addr addr;
823 SizeT szB;
824 StackBlock* descr;
825 } Stack0; /* innermost stack frame */
826 struct {
827 /* Pointer to a node in the interval tree for
828 this thread. */
829 StackTreeNode* nd;
830 } StackN; /* non-innermost stack frame */
831 struct {
832 /* Pointer to a GlobalBlock in the interval tree of
833 global blocks. */
834 GlobalTreeNode* nd;
835 } Global;
837 Inv;
839 Invar;
841 /* Partial debugging printing for an Invar. */
842 static void pp_Invar ( Invar* i )
844 switch (i->tag) {
845 case Inv_Unset:
846 VG_(printf)("Unset");
847 break;
848 case Inv_Unknown:
849 VG_(printf)("Unknown");
850 break;
851 case Inv_Stack0:
852 VG_(printf)("Stack0 [%#lx,+%lu)",
853 i->Inv.Stack0.addr, i->Inv.Stack0.szB);
854 break;
855 case Inv_StackN:
856 VG_(printf)("StackN [%#lx,+%lu)",
857 i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
858 break;
859 case Inv_Global:
860 VG_(printf)("Global [%#lx,+%lu)",
861 i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
862 break;
863 default:
864 tl_assert(0);
868 /* Compare two Invars for equality. */
869 static Bool eq_Invar ( Invar* i1, Invar* i2 )
871 if (i1->tag != i2->tag)
872 return False;
873 switch (i1->tag) {
874 case Inv_Unset:
875 return True;
876 case Inv_Unknown:
877 return True;
878 case Inv_Stack0:
879 return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
880 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
881 case Inv_StackN:
882 return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
883 case Inv_Global:
884 return i1->Inv.Global.nd == i2->Inv.Global.nd;
885 default:
886 tl_assert(0);
888 /*NOTREACHED*/
889 tl_assert(0);
892 /* Generate a piece of text showing 'ea' is relative to 'invar', if
893 known. If unknown, generate an empty string. 'buf' must be at
894 least 32 bytes in size. Also return the absolute value of the
895 delta, if known, or zero if not known.
897 static void gen_delta_str ( /*OUT*/HChar* buf,
898 /*OUT*/UWord* absDelta,
899 Invar* inv, Addr ea )
901 Addr block = 0;
902 SizeT szB = 0;
904 buf[0] = 0;
905 *absDelta = 0;
907 switch (inv->tag) {
908 case Inv_Unknown:
909 case Inv_Unset:
910 return; /* unknown */
911 case Inv_Stack0:
912 block = inv->Inv.Stack0.addr;
913 szB = inv->Inv.Stack0.szB;
914 break;
915 case Inv_StackN:
916 block = inv->Inv.StackN.nd->addr;
917 szB = inv->Inv.StackN.nd->szB;
918 break;
919 case Inv_Global:
920 block = inv->Inv.Global.nd->addr;
921 szB = inv->Inv.Global.nd->szB;
922 break;
923 default:
924 tl_assert(0);
926 tl_assert(szB > 0);
927 if (ea < block) {
928 *absDelta = block - ea;
929 VG_(sprintf)(buf, "%'lu before", *absDelta);
931 else if (ea >= block + szB) {
932 *absDelta = ea - (block + szB);
933 VG_(sprintf)(buf, "%'lu after", *absDelta);
935 else {
936 // Leave *absDelta at zero.
937 VG_(sprintf)(buf, "%'lu inside", ea - block);
942 /* Print selected parts of an Invar, suitable for use in error
943 messages. */
944 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
946 const HChar* str;
947 tl_assert(nBuf >= 128);
948 buf[0] = 0;
949 switch (inv->tag) {
950 case Inv_Unknown:
951 VG_(sprintf)(buf, "%s", "unknown");
952 break;
953 case Inv_Stack0:
954 str = "array";
955 VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
956 str, inv->Inv.Stack0.descr->name,
957 inv->Inv.Stack0.szB );
958 break;
959 case Inv_StackN:
960 str = "array";
961 VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
962 str, inv->Inv.StackN.nd->descr->name,
963 inv->Inv.StackN.nd->descr->szB,
964 depth - inv->Inv.StackN.nd->depth );
965 break;
966 case Inv_Global:
967 str = "array";
968 VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
969 str, inv->Inv.Global.nd->descr->name,
970 inv->Inv.Global.nd->descr->szB,
971 inv->Inv.Global.nd->descr->soname );
972 break;
973 case Inv_Unset:
974 VG_(sprintf)(buf, "%s", "Unset!");
975 break;
976 default:
977 tl_assert(0);
982 //////////////////////////////////////////////////////////////
983 // //
984 // our globals //
985 // //
986 //////////////////////////////////////////////////////////////
988 //////////////////////////////////////////////////////////////
991 #define N_QCACHE 16
993 /* Powers of two only, else the result will be chaos */
994 #define QCACHE_ADVANCE_EVERY 16
996 /* Per-thread query cache. Note that the invar can only be Inv_StackN
997 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
998 typedef
999 struct {
1000 Addr addr;
1001 SizeT szB;
1002 Invar inv;
1004 QCElem;
1006 typedef
1007 struct {
1008 Word nInUse;
1009 QCElem elems[N_QCACHE];
1011 QCache;
1013 static void QCache__invalidate ( QCache* qc ) {
1014 tl_assert(qc->nInUse >= 0);
1015 qc->nInUse = 0;
1018 static void QCache__pp ( QCache* qc, const HChar* who )
1020 Word i;
1021 VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
1022 for (i = 0; i < qc->nInUse; i++) {
1023 VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
1024 pp_Invar(&qc->elems[i].inv);
1025 VG_(printf)("\n");
1027 VG_(printf)(">>>\n");
1030 static ULong stats__qcache_queries = 0;
1031 static ULong stats__qcache_misses = 0;
1032 static ULong stats__qcache_probes = 0;
1035 //////////////////////////////////////////////////////////////
1037 /* Each thread has:
1038 * a shadow stack of StackFrames, which is a double-linked list
1039 * an stack block interval tree
1041 static struct _StackFrame** shadowStacks;
1043 static WordFM** /* StackTreeNode */ siTrees;
1045 static QCache* qcaches;
1048 /* Additionally, there is one global variable interval tree
1049 for the entire process.
1051 static WordFM* /* GlobalTreeNode */ giTree;
1054 static void invalidate_all_QCaches ( void )
1056 Word i;
1057 for (i = 0; i < VG_N_THREADS; i++) {
1058 QCache__invalidate( &qcaches[i] );
1062 static void ourGlobals_init ( void )
1064 Word i;
1066 shadowStacks = sg_malloc( "di.sg_main.oGi.2",
1067 VG_N_THREADS * sizeof shadowStacks[0] );
1068 siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
1069 qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
1071 for (i = 0; i < VG_N_THREADS; i++) {
1072 shadowStacks[i] = NULL;
1073 siTrees[i] = NULL;
1074 qcaches[i] = (QCache){};
1076 invalidate_all_QCaches();
1077 giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1078 (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1082 //////////////////////////////////////////////////////////////
1083 // //
1084 // Handle global variable load/unload events //
1085 // //
1086 //////////////////////////////////////////////////////////////
1088 static void acquire_globals ( ULong di_handle )
1090 Word n, i;
1091 XArray* /* of GlobalBlock */ gbs;
1092 if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1093 gbs = VG_(di_get_global_blocks_from_dihandle)
1094 (di_handle, True/*arrays only*/);
1095 if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs ));
1097 n = VG_(sizeXA)( gbs );
1098 for (i = 0; i < n; i++) {
1099 GlobalBlock* gbp;
1100 GlobalBlock* gb = VG_(indexXA)( gbs, i );
1101 if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n",
1102 gb->szB, gb->addr, gb->soname, gb->name );
1103 tl_assert(gb->szB > 0);
1104 /* Make a persistent copy of each GlobalBlock, and add it
1105 to the tree. */
1106 gbp = get_persistent_GlobalBlock( gb );
1107 add_block_to_GlobalTree( giTree, gbp );
1110 VG_(deleteXA)( gbs );
1114 /* We only intercept these two because we need to see any di_handles
1115 that might arise from the mappings/allocations. */
1116 void sg_new_mem_mmap( Addr a, SizeT len,
1117 Bool rr, Bool ww, Bool xx, ULong di_handle )
1119 if (di_handle > 0)
1120 acquire_globals(di_handle);
1122 void sg_new_mem_startup( Addr a, SizeT len,
1123 Bool rr, Bool ww, Bool xx, ULong di_handle )
1125 if (di_handle > 0)
1126 acquire_globals(di_handle);
1128 void sg_die_mem_munmap ( Addr a, SizeT len )
1130 Bool debug = (Bool)0;
1131 Bool overlap = False;
1133 if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1135 if (len == 0)
1136 return;
1138 overlap = del_GlobalTree_range(giTree, a, len);
1140 { /* redundant sanity check */
1141 UWord keyW, valW;
1142 VG_(initIterFM)( giTree );
1143 while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1144 GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1145 tl_assert(valW == 0);
1146 tl_assert(nd->szB > 0);
1147 tl_assert(nd->addr + nd->szB <= a
1148 || a + len <= nd->addr);
1150 VG_(doneIterFM)( giTree );
1153 if (!overlap)
1154 return;
1156 /* Ok, the range contained some blocks. Therefore we'll need to
1157 visit all the Invars in all the thread shadow stacks, and
1158 convert all Inv_Global entries that intersect [a,a+len) to
1159 Inv_Unknown. */
1160 tl_assert(len > 0);
1161 preen_global_Invars( a, len );
1162 invalidate_all_QCaches();
1166 //////////////////////////////////////////////////////////////
1167 // //
1168 // StackFrame //
1169 // //
1170 //////////////////////////////////////////////////////////////
1172 static ULong stats__total_accesses = 0;
1173 static ULong stats__classify_Stack0 = 0;
1174 static ULong stats__classify_StackN = 0;
1175 static ULong stats__classify_Global = 0;
1176 static ULong stats__classify_Unknown = 0;
1177 static ULong stats__Invars_preened = 0;
1178 static ULong stats__Invars_changed = 0;
1179 static ULong stats__t_i_b_empty = 0;
1180 static ULong stats__htab_fast = 0;
1181 static ULong stats__htab_searches = 0;
1182 static ULong stats__htab_probes = 0;
1183 static ULong stats__htab_resizes = 0;
1186 /* A dynamic instance of an instruction */
1187 typedef
1188 struct {
1189 /* IMMUTABLE */
1190 Addr insn_addr; /* NB! zero means 'not in use' */
1191 XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1192 /* MUTABLE */
1193 Invar invar;
1195 IInstance;
1198 #define N_HTAB_FIXED 64
1200 typedef
1201 struct _StackFrame {
1202 /* The sp when the frame was created, so we know when to get rid
1203 of it. */
1204 Addr creation_sp;
1205 /* The stack frames for a thread are arranged as a doubly linked
1206 list. Obviously the outermost frame in the stack has .outer
1207 as NULL and the innermost in theory has .inner as NULL.
1208 However, when a function returns, we don't delete the
1209 just-vacated StackFrame. Instead, it is retained in the list
1210 and will be re-used when the next call happens. This is so
1211 as to avoid constantly having to dynamically allocate and
1212 deallocate frames. */
1213 struct _StackFrame* inner;
1214 struct _StackFrame* outer;
1215 Word depth; /* 0 for outermost; increases inwards */
1216 /* Information for each memory referencing instruction, for this
1217 instantiation of the function. The iinstances array is
1218 operated as a simple linear-probe hash table, which is
1219 dynamically expanded as necessary. Once critical thing is
1220 that an IInstance with a .insn_addr of zero is interpreted to
1221 mean that hash table slot is unused. This means we can't
1222 store an IInstance for address zero. */
1223 /* Note that htab initially points to htab_fixed. If htab_fixed
1224 turns out not to be big enough then htab is made to point to
1225 dynamically allocated memory. But it's often the case that
1226 htab_fixed is big enough, so this optimisation saves a huge
1227 number of sg_malloc/sg_free call pairs. */
1228 IInstance* htab;
1229 UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1230 UWord htab_used; /* number of hash table slots currently in use */
1231 /* If this frame is currently making a call, then the following
1232 are relevant. */
1233 Addr sp_at_call;
1234 Addr fp_at_call;
1235 XArray* /* of Addr */ blocks_added_by_call;
1236 /* See comment just above */
1237 IInstance htab_fixed[N_HTAB_FIXED];
1239 StackFrame;
1245 /* Move this somewhere else? */
1246 /* Visit all Invars in the entire system. If 'isHeap' is True, change
1247 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1248 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1249 instead. */
1251 __attribute__((noinline))
1252 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1254 stats__Invars_preened++;
1255 tl_assert(len > 0);
1256 tl_assert(inv);
1257 switch (inv->tag) {
1258 case Inv_Global:
1259 tl_assert(inv->Inv.Global.nd);
1260 tl_assert(inv->Inv.Global.nd->szB > 0);
1261 if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1262 inv->Inv.Global.nd->addr,
1263 inv->Inv.Global.nd->szB);
1264 if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1265 inv->Inv.Global.nd->szB)) {
1266 inv->tag = Inv_Unknown;
1267 stats__Invars_changed++;
1269 break;
1270 case Inv_Stack0:
1271 case Inv_StackN:
1272 case Inv_Unknown:
1273 break;
1274 case Inv_Unset: /* this should never happen */
1275 /* fallthrough */
1276 default:
1277 tl_assert(0);
1281 __attribute__((noinline))
1282 static void preen_global_Invars ( Addr a, SizeT len )
1284 Int i;
1285 UWord u;
1286 StackFrame* frame;
1287 tl_assert(len > 0);
1288 for (i = 0; i < VG_N_THREADS; i++) {
1289 frame = shadowStacks[i];
1290 if (!frame)
1291 continue; /* no frames for this thread */
1292 /* start from the innermost frame */
1293 while (frame->inner)
1294 frame = frame->inner;
1295 tl_assert(frame->outer);
1296 /* work through the frames from innermost to outermost. The
1297 order isn't important; we just need to ensure we visit each
1298 frame once (including those which are not actually active,
1299 more 'inner' than the 'innermost active frame', viz, just
1300 hanging around waiting to be used, when the current innermost
1301 active frame makes more calls. See comments on definition of
1302 struct _StackFrame. */
1303 for (; frame; frame = frame->outer) {
1304 UWord xx = 0; /* sanity check only; count of used htab entries */
1305 if (!frame->htab)
1306 continue; /* frame not in use. See shadowStack_unwind(). */
1307 for (u = 0; u < frame->htab_size; u++) {
1308 IInstance* ii = &frame->htab[u];
1309 if (ii->insn_addr == 0)
1310 continue; /* not in use */
1311 if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1312 preen_global_Invar( &ii->invar, a, len );
1313 xx++;
1315 tl_assert(xx == frame->htab_used);
1321 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1322 of the ip are guaranteed to be zero */
1323 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1324 return (ip >> 0) & (htab_size - 1);
1327 __attribute__((noinline))
1328 static void initialise_II_hash_table ( StackFrame* sf )
1330 UWord i;
1331 sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1332 sf->htab = &sf->htab_fixed[0];
1333 tl_assert(sf->htab);
1334 sf->htab_used = 0;
1335 for (i = 0; i < sf->htab_size; i++)
1336 sf->htab[i].insn_addr = 0; /* NOT IN USE */
1340 __attribute__((noinline))
1341 static void resize_II_hash_table ( StackFrame* sf )
1343 UWord i, j, ix, old_size, new_size;
1344 IInstance *old_htab, *new_htab, *old;
1346 tl_assert(sf && sf->htab);
1347 old_size = sf->htab_size;
1348 new_size = 2 * old_size;
1349 old_htab = sf->htab;
1350 new_htab = sg_malloc( "di.sg_main.rIht.1",
1351 new_size * sizeof(IInstance) );
1352 for (i = 0; i < new_size; i++) {
1353 new_htab[i].insn_addr = 0; /* NOT IN USE */
1355 for (i = 0; i < old_size; i++) {
1356 old = &old_htab[i];
1357 if (old->insn_addr == 0 /* NOT IN USE */)
1358 continue;
1359 ix = compute_II_hash(old->insn_addr, new_size);
1360 /* find out where to put this, in the new table */
1361 j = new_size;
1362 while (1) {
1363 if (new_htab[ix].insn_addr == 0)
1364 break;
1365 /* This can't ever happen, because it would mean the new
1366 table is full; that isn't allowed -- even the old table is
1367 only allowed to become half full. */
1368 tl_assert(j > 0);
1369 j--;
1370 ix++; if (ix == new_size) ix = 0;
1372 /* copy the old entry to this location */
1373 tl_assert(ix < new_size);
1374 tl_assert(new_htab[ix].insn_addr == 0);
1375 new_htab[ix] = *old;
1376 tl_assert(new_htab[ix].insn_addr != 0);
1378 /* all entries copied; free old table. */
1379 if (old_htab != &sf->htab_fixed[0])
1380 sg_free(old_htab);
1381 sf->htab = new_htab;
1382 sf->htab_size = new_size;
1383 /* check sf->htab_used is correct. Optional and a bit expensive
1384 but anyway: */
1385 j = 0;
1386 for (i = 0; i < new_size; i++) {
1387 if (new_htab[i].insn_addr != 0) {
1388 j++;
1391 tl_assert(j == sf->htab_used);
1392 if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1396 __attribute__((noinline))
1397 static IInstance* find_or_create_IInstance_SLOW (
1398 StackFrame* sf,
1399 Addr ip,
1400 XArray* /* StackBlock */ ip_frameblocks
1403 UWord i, ix;
1405 stats__htab_searches++;
1407 tl_assert(sf);
1408 tl_assert(sf->htab);
1410 /* Make sure the table loading doesn't get too high. */
1411 if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1412 stats__htab_resizes++;
1413 resize_II_hash_table(sf);
1415 tl_assert(2 * sf->htab_used <= sf->htab_size);
1417 ix = compute_II_hash(ip, sf->htab_size);
1418 i = sf->htab_size;
1419 while (1) {
1420 stats__htab_probes++;
1421 /* Note that because of the way the fast-case handler works,
1422 these two tests are actually redundant in the first iteration
1423 of this loop. (Except they aren't redundant if the code just
1424 above resized the table first. :-) */
1425 if (sf->htab[ix].insn_addr == ip)
1426 return &sf->htab[ix];
1427 if (sf->htab[ix].insn_addr == 0)
1428 break;
1429 /* If i ever gets to zero and we have found neither what we're
1430 looking for nor an empty slot, the table must be full. Which
1431 isn't possible -- we monitor the load factor to ensure it
1432 doesn't get above say 50%; if that ever does happen the table
1433 is resized. */
1434 tl_assert(i > 0);
1435 i--;
1436 ix++;
1437 if (ix == sf->htab_size) ix = 0;
1440 /* So now we've found a free slot at ix, and we can use that. */
1441 tl_assert(sf->htab[ix].insn_addr == 0);
1443 /* Add a new record in this slot. */
1444 tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1445 sf->htab[ix].insn_addr = ip;
1446 sf->htab[ix].blocks = ip_frameblocks;
1447 sf->htab[ix].invar.tag = Inv_Unset;
1448 sf->htab_used++;
1449 return &sf->htab[ix];
1453 inline
1454 static IInstance* find_or_create_IInstance (
1455 StackFrame* sf,
1456 Addr ip,
1457 XArray* /* StackBlock */ ip_frameblocks
1460 UWord ix = compute_II_hash(ip, sf->htab_size);
1461 /* Is it in the first slot we come to? */
1462 if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1463 stats__htab_fast++;
1464 return &sf->htab[ix];
1466 /* If the first slot we come to is empty, bag it. */
1467 if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1468 stats__htab_fast++;
1469 tl_assert(ip != 0);
1470 sf->htab[ix].insn_addr = ip;
1471 sf->htab[ix].blocks = ip_frameblocks;
1472 sf->htab[ix].invar.tag = Inv_Unset;
1473 sf->htab_used++;
1474 return &sf->htab[ix];
1476 /* Otherwise we hand off to the slow case, which searches other
1477 slots, and optionally resizes the table if necessary. */
1478 return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1482 __attribute__((noinline))
1483 static Addr calculate_StackBlock_EA ( StackBlock* descr,
1484 Addr sp, Addr fp ) {
1485 UWord w1 = (UWord)descr->base;
1486 UWord w2 = (UWord)(descr->spRel ? sp : fp);
1487 UWord ea = w1 + w2;
1488 return ea;
1491 /* Given an array of StackBlocks, return an array of Addrs, holding
1492 their effective addresses. Caller deallocates result array. */
1493 __attribute__((noinline))
1494 static XArray* /* Addr */ calculate_StackBlock_EAs (
1495 XArray* /* StackBlock */ blocks,
1496 Addr sp, Addr fp
1499 XArray* res;
1500 Word i, n = VG_(sizeXA)( blocks );
1501 tl_assert(n > 0);
1502 res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1503 for (i = 0; i < n; i++) {
1504 StackBlock* blk = VG_(indexXA)( blocks, i );
1505 Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1506 VG_(addToXA)( res, &ea );
1508 return res;
1512 /* Try to classify the block into which a memory access falls, and
1513 write the result in 'inv'. This writes all relevant fields of
1514 'inv'. */
1515 __attribute__((noinline))
1516 static void classify_address ( /*OUT*/Invar* inv,
1517 ThreadId tid,
1518 Addr ea, Addr sp, Addr fp,
1519 UWord szB,
1520 XArray* /* of StackBlock */ thisInstrBlocks )
1522 tl_assert(szB > 0);
1523 /* First, look in the stack blocks accessible in this instruction's
1524 frame. */
1526 Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1527 if (nBlocks == 0) stats__t_i_b_empty++;
1528 for (i = 0; i < nBlocks; i++) {
1529 StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1530 Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1531 if (bea <= ea && ea + szB <= bea + descr->szB) {
1532 /* found it */
1533 inv->tag = Inv_Stack0;
1534 inv->Inv.Stack0.addr = bea;
1535 inv->Inv.Stack0.szB = descr->szB;
1536 inv->Inv.Stack0.descr = descr;
1537 stats__classify_Stack0++;
1538 return;
1542 /* Look in this thread's query cache */
1543 { Word i;
1544 QCache* cache = &qcaches[tid];
1545 static UWord ctr = 0;
1546 stats__qcache_queries++;
1547 for (i = 0; i < cache->nInUse; i++) {
1548 if (0) /* expensive in a loop like this */
1549 tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1550 stats__qcache_probes++;
1551 if (is_subinterval_of(cache->elems[i].addr,
1552 cache->elems[i].szB, ea, szB)) {
1553 if (i > 0
1554 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1555 QCElem tmp;
1556 tmp = cache->elems[i-1];
1557 cache->elems[i-1] = cache->elems[i];
1558 cache->elems[i] = tmp;
1559 i--;
1561 *inv = cache->elems[i].inv;
1562 return;
1565 stats__qcache_misses++;
1567 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1568 in some calling frame? Consult this thread's stack-block
1569 interval tree to find out. */
1570 { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1571 /* We know that [ea,ea+1) is in the block, but we need to
1572 restrict to the case where the whole access falls within
1573 it. */
1574 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1575 nd = NULL;
1577 if (nd) {
1578 /* found it */
1579 inv->tag = Inv_StackN;
1580 inv->Inv.StackN.nd = nd;
1581 stats__classify_StackN++;
1582 goto out;
1585 /* Not in a stack block. Try the global pool. */
1586 { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1587 /* We know that [ea,ea+1) is in the block, but we need to
1588 restrict to the case where the whole access falls within
1589 it. */
1590 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1591 nd = NULL;
1593 if (nd) {
1594 /* found it */
1595 inv->tag = Inv_Global;
1596 inv->Inv.Global.nd = nd;
1597 stats__classify_Global++;
1598 goto out;
1601 /* No idea - give up. */
1602 inv->tag = Inv_Unknown;
1603 stats__classify_Unknown++;
1605 /* Update the cache */
1606 out:
1607 { Addr toadd_addr = 0;
1608 SizeT toadd_szB = 0;
1609 QCache* cache = &qcaches[tid];
1611 static UWord ctr = 0;
1612 Bool show = False;
1613 if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1615 if (show) QCache__pp(cache, "before upd");
1617 switch (inv->tag) {
1618 case Inv_Global:
1619 toadd_addr = inv->Inv.Global.nd->addr;
1620 toadd_szB = inv->Inv.Global.nd->szB;
1621 break;
1622 case Inv_StackN:
1623 toadd_addr = inv->Inv.StackN.nd->addr;
1624 toadd_szB = inv->Inv.StackN.nd->szB;
1625 break;
1626 case Inv_Unknown: {
1627 /* This is more complex. We need to figure out the
1628 intersection of the "holes" in the global and stack
1629 interval trees into which [ea,ea+szB) falls. This is
1630 further complicated by the fact that [ea,ea+szB) might
1631 not fall cleanly into a hole; it may instead fall across
1632 the boundary of a stack or global block. In that case
1633 we just ignore it and don't update the cache, since we
1634 have no way to represent this situation precisely. */
1635 StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB;
1636 GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1637 Addr gMin, gMax, sMin, sMax, uMin, uMax;
1638 Bool sOK, gOK;
1639 sNegInf.addr = 0;
1640 sNegInf.szB = 1;
1641 sPosInf.addr = ~(UWord)0;
1642 sPosInf.szB = 1;
1643 gNegInf.addr = 0;
1644 gNegInf.szB = 1;
1645 gPosInf.addr = ~(UWord)0;
1646 gPosInf.szB = 1;
1647 sKey.addr = ea;
1648 sKey.szB = szB;
1649 gKey.addr = ea;
1650 gKey.szB = szB;
1651 if (0) VG_(printf)("Tree sizes %lu %lu\n",
1652 VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1653 sOK = VG_(findBoundsFM)( siTrees[tid],
1654 (UWord*)&sLB, NULL/*unused*/,
1655 (UWord*)&sUB, NULL/*unused*/,
1656 (UWord)&sNegInf, 0/*unused*/,
1657 (UWord)&sPosInf, 0/*unused*/,
1658 (UWord)&sKey );
1659 gOK = VG_(findBoundsFM)( giTree,
1660 (UWord*)&gLB, NULL/*unused*/,
1661 (UWord*)&gUB, NULL/*unused*/,
1662 (UWord)&gNegInf, 0/*unused*/,
1663 (UWord)&gPosInf, 0/*unused*/,
1664 (UWord)&gKey );
1665 if (!(sOK && gOK)) {
1666 /* If this happens, then [ea,ea+szB) partially overlaps
1667 a heap or stack block. We can't represent that, so
1668 just forget it (should be very rare). However, do
1669 maximum sanity checks first. In such a
1670 partial overlap case, it can't be the case that both
1671 [ea] and [ea+szB-1] overlap the same block, since if
1672 that were indeed the case then it wouldn't be a
1673 partial overlap; rather it would simply fall inside
1674 that block entirely and we shouldn't be inside this
1675 conditional at all. */
1676 if (!sOK) {
1677 StackTreeNode *ndFirst, *ndLast;
1678 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1679 ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1680 /* if both ends of the range fall inside a block,
1681 they can't be in the same block. */
1682 if (ndFirst && ndLast)
1683 tl_assert(ndFirst != ndLast);
1684 /* for each end of the range, if it is in a block,
1685 the range as a whole can't be entirely within the
1686 block. */
1687 if (ndFirst)
1688 tl_assert(!is_subinterval_of(ndFirst->addr,
1689 ndFirst->szB, ea, szB));
1690 if (ndLast)
1691 tl_assert(!is_subinterval_of(ndLast->addr,
1692 ndLast->szB, ea, szB));
1694 if (!gOK) {
1695 GlobalTreeNode *ndFirst, *ndLast;
1696 ndFirst = find_GlobalTreeNode( giTree, ea );
1697 ndLast = find_GlobalTreeNode( giTree, ea+szB-1 );
1698 /* if both ends of the range fall inside a block,
1699 they can't be in the same block. */
1700 if (ndFirst && ndLast)
1701 tl_assert(ndFirst != ndLast);
1702 /* for each end of the range, if it is in a block,
1703 the range as a whole can't be entirely within the
1704 block. */
1705 if (ndFirst)
1706 tl_assert(!is_subinterval_of(ndFirst->addr,
1707 ndFirst->szB, ea, szB));
1708 if (ndLast)
1709 tl_assert(!is_subinterval_of(ndLast->addr,
1710 ndLast->szB, ea, szB));
1712 if (0) VG_(printf)("overlapping blocks in cache\n");
1713 return;
1715 sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB);
1716 sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1);
1717 gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB);
1718 gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1);
1719 if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1720 sMin, sMax, gMin, gMax);
1721 /* [sMin,sMax] and [gMin,gMax] must both contain
1722 [ea,ea+szB) (right?) That implies they must overlap at
1723 at least over [ea,ea+szB). */
1724 tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1725 tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1726 /* So now compute their intersection. */
1727 uMin = Addr__max( sMin, gMin );
1728 uMax = Addr__min( sMax, gMax );
1729 if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1730 tl_assert(uMin <= uMax);
1731 tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1732 /* Finally, we can park [uMin,uMax] in the cache. However,
1733 if uMax is ~0, we can't represent the difference; hence
1734 fudge uMax. */
1735 if (uMin < uMax && uMax == ~(UWord)0)
1736 uMax--;
1737 toadd_addr = uMin;
1738 toadd_szB = uMax - uMin + 1;
1739 break;
1741 default:
1742 /* We should only be caching info for the above 3 cases */
1743 tl_assert(0);
1744 } /* switch (inv->tag) */
1746 { /* and actually add this to the cache, finally */
1747 Word i;
1748 Word ip = cache->nInUse / 2; /* doesn't seem critical */
1750 if (cache->nInUse < N_QCACHE)
1751 cache->nInUse++;
1752 for (i = cache->nInUse-1; i > ip; i--) {
1753 cache->elems[i] = cache->elems[i-1];
1756 tl_assert(toadd_szB > 0);
1757 cache->elems[ip].addr = toadd_addr;
1758 cache->elems[ip].szB = toadd_szB;
1759 cache->elems[ip].inv = *inv;
1762 if (show) QCache__pp(cache, "after upd");
1768 /* CALLED FROM GENERATED CODE */
1769 static
1770 VG_REGPARM(3)
1771 void helperc__mem_access ( /* Known only at run time: */
1772 Addr ea, Addr sp, Addr fp,
1773 /* Known at translation time: */
1774 Word sszB, Addr ip, XArray* ip_frameBlocks )
1776 UWord szB;
1777 IInstance* iinstance;
1778 Invar* inv;
1779 Invar new_inv;
1780 ThreadId tid = VG_(get_running_tid)();
1781 StackFrame* frame;
1782 HChar bufE[160], bufA[160], bufD[32];
1784 stats__total_accesses++;
1786 tl_assert(is_sane_TId(tid));
1787 frame = shadowStacks[tid];
1788 tl_assert(frame);
1790 /* Find the instance info for this instruction. */
1791 tl_assert(ip_frameBlocks);
1792 iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1793 tl_assert(iinstance);
1794 tl_assert(iinstance->blocks == ip_frameBlocks);
1796 szB = (sszB < 0) ? (-sszB) : sszB;
1797 tl_assert(szB > 0);
1799 inv = &iinstance->invar;
1801 /* Deal with first uses of instruction instances. */
1802 if (inv->tag == Inv_Unset) {
1803 /* This is the first use of this instance of the instruction, so
1804 we can't make any check; we merely record what we saw, so we
1805 can compare it against what happens for 2nd and subsequent
1806 accesses. */
1807 classify_address( inv,
1808 tid, ea, sp, fp, szB, iinstance->blocks );
1809 tl_assert(inv->tag != Inv_Unset);
1810 return;
1813 /* So generate an Invar and see if it's different from what
1814 we had before. */
1815 classify_address( &new_inv,
1816 tid, ea, sp, fp, szB, iinstance->blocks );
1817 tl_assert(new_inv.tag != Inv_Unset);
1819 /* Did we see something different from before? If no, then there's
1820 no error. */
1821 tl_assert(inv->tag != Inv_Unset);
1823 if (LIKELY(eq_Invar(&new_inv, inv)))
1824 return;
1826 VG_(memset)(bufE, 0, sizeof(bufE));
1827 show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1829 VG_(memset)(bufA, 0, sizeof(bufA));
1830 show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1832 VG_(memset)(bufD, 0, sizeof(bufD));
1833 UWord absDelta;
1834 gen_delta_str( bufD, &absDelta, inv, ea );
1836 if (absDelta < 1024)
1837 sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
1839 /* And now install the new observation as "standard", so as to
1840 make future error messages make more sense. */
1841 *inv = new_inv;
1845 ////////////////////////////////////////
1846 /* Primary push-a-new-frame routine. Called indirectly from
1847 generated code. */
1849 static UWord stats__max_sitree_size = 0;
1850 static UWord stats__max_gitree_size = 0;
1852 static
1853 void shadowStack_new_frame ( ThreadId tid,
1854 Addr sp_at_call_insn,
1855 Addr sp_post_call_insn,
1856 Addr fp_at_call_insn,
1857 Addr ip_post_call_insn,
1858 XArray* descrs_at_call_insn )
1860 StackFrame *callee, *caller;
1861 tl_assert(is_sane_TId(tid));
1863 caller = shadowStacks[tid];
1864 tl_assert(caller);
1866 if (caller->outer) { /* "this is not the outermost frame" */
1867 tl_assert(caller->outer->inner == caller);
1868 tl_assert(caller->outer->depth >= 0);
1869 tl_assert(1 + caller->outer->depth == caller->depth);
1870 } else {
1871 tl_assert(caller->depth == 0);
1874 caller->sp_at_call = sp_at_call_insn;
1875 caller->fp_at_call = fp_at_call_insn;
1877 if (descrs_at_call_insn) {
1878 tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1879 caller->blocks_added_by_call
1880 = calculate_StackBlock_EAs( descrs_at_call_insn,
1881 sp_at_call_insn, fp_at_call_insn );
1882 if (caller->blocks_added_by_call)
1883 add_blocks_to_StackTree( siTrees[tid],
1884 descrs_at_call_insn,
1885 caller->blocks_added_by_call,
1886 caller->depth /* stack depth at which
1887 these blocks are
1888 considered to exist*/ );
1889 if (1) {
1890 UWord s = VG_(sizeFM)( siTrees[tid] );
1891 UWord g = VG_(sizeFM)( giTree );
1892 Bool sb = s > stats__max_sitree_size;
1893 Bool gb = g > stats__max_gitree_size;
1894 if (sb) stats__max_sitree_size = s;
1895 if (gb) stats__max_gitree_size = g;
1896 if (0 && (sb || gb))
1897 VG_(message)(Vg_DebugMsg,
1898 "exp-sgcheck: new max tree sizes: "
1899 "StackTree %lu, GlobalTree %lu\n",
1900 stats__max_sitree_size, stats__max_gitree_size );
1902 } else {
1903 caller->blocks_added_by_call = NULL;
1906 /* caller->blocks_added_by_call is used again (and then freed) when
1907 this frame is removed from the stack. */
1909 if (caller->inner) {
1910 callee = caller->inner;
1911 } else {
1912 callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1913 VG_(memset)(callee, 0, sizeof(StackFrame));
1914 callee->outer = caller;
1915 caller->inner = callee;
1916 callee->depth = 1 + caller->depth;
1917 tl_assert(callee->inner == NULL);
1920 /* This sets up .htab, .htab_size and .htab_used */
1921 initialise_II_hash_table( callee );
1923 callee->creation_sp = sp_post_call_insn;
1924 callee->sp_at_call = 0; // not actually required ..
1925 callee->fp_at_call = 0; // .. these 3 initialisations are ..
1926 callee->blocks_added_by_call = NULL; // .. just for cleanness
1928 /* record the new running stack frame */
1929 shadowStacks[tid] = callee;
1931 /* and this thread's query cache is now invalid */
1932 QCache__invalidate( &qcaches[tid] );
1934 if (0)
1935 { Word d = callee->depth;
1936 const HChar *fnname;
1937 Bool ok;
1938 Addr ip = ip_post_call_insn;
1939 DiEpoch ep = VG_(current_DiEpoch)();
1940 ok = VG_(get_fnname_w_offset)( ep, ip, &fnname );
1941 while (d > 0) {
1942 VG_(printf)(" ");
1943 d--;
1945 VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1949 /* CALLED FROM GENERATED CODE */
1950 static
1951 VG_REGPARM(3)
1952 void helperc__new_frame ( Addr sp_post_call_insn,
1953 Addr fp_at_call_insn,
1954 Addr ip_post_call_insn,
1955 XArray* blocks_at_call_insn,
1956 Word sp_adjust )
1958 ThreadId tid = VG_(get_running_tid)();
1959 Addr sp_at_call_insn = sp_post_call_insn + sp_adjust;
1960 shadowStack_new_frame( tid,
1961 sp_at_call_insn,
1962 sp_post_call_insn,
1963 fp_at_call_insn,
1964 ip_post_call_insn,
1965 blocks_at_call_insn );
1969 ////////////////////////////////////////
1970 /* Primary remove-frame(s) routine. Called indirectly from
1971 generated code. */
1973 __attribute__((noinline))
1974 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1976 StackFrame *innermost, *innermostOrig;
1977 tl_assert(is_sane_TId(tid));
1978 innermost = shadowStacks[tid];
1979 tl_assert(innermost);
1980 innermostOrig = innermost;
1981 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1982 while (1) {
1983 if (!innermost->outer)
1984 break;
1985 if (innermost->inner)
1986 tl_assert(innermost->inner->outer == innermost);
1987 tl_assert(innermost->outer->inner == innermost);
1988 tl_assert(innermost->blocks_added_by_call == NULL);
1989 if (sp_now <= innermost->creation_sp) break;
1990 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1991 tl_assert(innermost->htab);
1992 if (innermost->htab != &innermost->htab_fixed[0])
1993 sg_free(innermost->htab);
1994 /* be on the safe side */
1995 innermost->creation_sp = 0;
1996 innermost->htab = NULL;
1997 innermost->htab_size = 0;
1998 innermost->htab_used = 0;
1999 innermost->sp_at_call = 0;
2000 innermost->fp_at_call = 0;
2001 innermost->blocks_added_by_call = NULL;
2002 innermost = innermost->outer;
2004 /* So now we're "back" in the calling frame. Remove from this
2005 thread's stack-interval-tree, the blocks added at the time of
2006 the call. */
2008 if (innermost->outer) { /* not at the outermost frame */
2009 if (innermost->blocks_added_by_call == NULL) {
2010 } else {
2011 del_blocks_from_StackTree( siTrees[tid],
2012 innermost->blocks_added_by_call );
2013 VG_(deleteXA)( innermost->blocks_added_by_call );
2014 innermost->blocks_added_by_call = NULL;
2017 /* That completes the required tidying of the interval tree
2018 associated with the frame we just removed. */
2020 if (0) {
2021 Word d = innermost->depth;
2022 while (d > 0) {
2023 VG_(printf)(" ");
2024 d--;
2026 VG_(printf)("X\n");
2031 tl_assert(innermost);
2033 if (innermost != innermostOrig) {
2034 shadowStacks[tid] = innermost;
2035 /* this thread's query cache is now invalid */
2036 QCache__invalidate( &qcaches[tid] );
2042 //////////////////////////////////////////////////////////////
2043 // //
2044 // Instrumentation //
2045 // //
2046 //////////////////////////////////////////////////////////////
2048 /* What does instrumentation need to do?
2050 - at each Call transfer, generate a call to shadowStack_new_frame
2051 do this by manually inspecting the IR
2053 - at each sp change, if the sp change is negative,
2054 call shadowStack_unwind
2055 do this by asking for SP-change analysis
2057 - for each memory referencing instruction,
2058 call helperc__mem_access
2061 /* A complication: sg_ instrumentation and h_ instrumentation need to
2062 be interleaved. Since the latter is a lot more complex than the
2063 former, we split the sg_ instrumentation here into four functions
2064 and let the h_ instrumenter call the four functions as it goes.
2065 Hence the h_ instrumenter drives the sg_ instrumenter.
2067 To make this viable, the sg_ instrumenter carries what running
2068 state it needs in 'struct _SGEnv'. This is exported only
2069 abstractly from this file.
2072 struct _SGEnv {
2073 /* the current insn's IP */
2074 Addr curr_IP;
2075 /* whether the above is actually known */
2076 Bool curr_IP_known;
2077 /* if we find a mem ref, is it the first for this insn? Used for
2078 detecting insns which make more than one memory ref, a situation
2079 we basically can't really handle properly; and so we ignore all
2080 but the first ref. */
2081 Bool firstRef;
2082 /* READONLY */
2083 IRTemp (*newIRTemp_cb)(IRType,void*);
2084 void* newIRTemp_opaque;
2088 /* --- Helper fns for instrumentation --- */
2090 static IRTemp gen_Get_SP ( struct _SGEnv* sge,
2091 IRSB* bbOut,
2092 const VexGuestLayout* layout,
2093 Int hWordTy_szB )
2095 IRExpr* sp_expr;
2096 IRTemp sp_temp;
2097 IRType sp_type;
2098 /* This in effect forces the host and guest word sizes to be the
2099 same. */
2100 tl_assert(hWordTy_szB == layout->sizeof_SP);
2101 sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2102 sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2103 sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2104 addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2105 return sp_temp;
2108 static IRTemp gen_Get_FP ( struct _SGEnv* sge,
2109 IRSB* bbOut,
2110 const VexGuestLayout* layout,
2111 Int hWordTy_szB )
2113 IRExpr* fp_expr;
2114 IRTemp fp_temp;
2115 IRType fp_type;
2116 /* This in effect forces the host and guest word sizes to be the
2117 same. */
2118 tl_assert(hWordTy_szB == layout->sizeof_SP);
2119 fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2120 fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2121 fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2122 addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2123 return fp_temp;
2126 static void instrument_mem_access ( struct _SGEnv* sge,
2127 IRSB* bbOut,
2128 IRExpr* addr,
2129 Int szB,
2130 Bool isStore,
2131 Int hWordTy_szB,
2132 Addr curr_IP,
2133 const VexGuestLayout* layout )
2135 IRType tyAddr = Ity_INVALID;
2136 XArray* frameBlocks = NULL;
2138 tl_assert(isIRAtom(addr));
2139 tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2141 tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2142 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2144 #if defined(VGA_x86)
2145 { UChar* p = (UChar*)curr_IP;
2146 // pop %ebp; RET
2147 if (p[0] == 0xc3 && p[-1] == 0x5d) return;
2148 // pop %ebp; RET $imm16
2149 if (p[0] == 0xc2 && p[-1] == 0x5d) return;
2150 // PUSH %EBP; mov %esp,%ebp
2151 if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2153 #endif
2155 /* First off, find or create the StackBlocks for this instruction. */
2156 frameBlocks = get_StackBlocks_for_IP( curr_IP );
2157 tl_assert(frameBlocks);
2158 //if (VG_(sizeXA)( frameBlocks ) == 0)
2159 // frameBlocks = NULL;
2161 /* Generate a call to "helperc__mem_access", passing:
2162 addr current_SP current_FP szB curr_IP frameBlocks
2164 { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2165 IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2166 IRExpr** args
2167 = mkIRExprVec_6( addr,
2168 IRExpr_RdTmp(t_SP),
2169 IRExpr_RdTmp(t_FP),
2170 mkIRExpr_HWord( isStore ? (-szB) : szB ),
2171 mkIRExpr_HWord( curr_IP ),
2172 mkIRExpr_HWord( (HWord)frameBlocks ) );
2173 IRDirty* di
2174 = unsafeIRDirty_0_N( 3/*regparms*/,
2175 "helperc__mem_access",
2176 VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2177 args );
2179 addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2184 /* --- Instrumentation main (4 fns) --- */
2186 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2187 void* newIRTemp_opaque )
2189 struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2190 sizeof(struct _SGEnv));
2191 tl_assert(env);
2192 env->curr_IP = 0;
2193 env->curr_IP_known = False;
2194 env->firstRef = True;
2195 env->newIRTemp_cb = newIRTemp_cb;
2196 env->newIRTemp_opaque = newIRTemp_opaque;
2197 return env;
2200 void sg_instrument_fini ( struct _SGEnv * env )
2202 sg_free(env);
2205 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2206 as required. This must be called before 'st' itself is added to
2207 'sbOut'. */
2208 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2209 /*MOD*/IRSB* sbOut,
2210 IRStmt* st,
2211 const VexGuestLayout* layout,
2212 IRType gWordTy, IRType hWordTy )
2214 if (!sg_clo_enable_sg_checks)
2215 return;
2217 tl_assert(st);
2218 tl_assert(isFlatIRStmt(st));
2219 switch (st->tag) {
2220 case Ist_NoOp:
2221 case Ist_AbiHint:
2222 case Ist_Put:
2223 case Ist_PutI:
2224 case Ist_MBE:
2225 /* None of these can contain any memory references. */
2226 break;
2228 case Ist_Exit:
2229 tl_assert(st->Ist.Exit.jk != Ijk_Call);
2230 /* else we must deal with a conditional call */
2231 break;
2233 case Ist_IMark:
2234 env->curr_IP_known = True;
2235 env->curr_IP = st->Ist.IMark.addr;
2236 env->firstRef = True;
2237 break;
2239 case Ist_Store:
2240 tl_assert(env->curr_IP_known);
2241 if (env->firstRef) {
2242 instrument_mem_access(
2243 env, sbOut,
2244 st->Ist.Store.addr,
2245 sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2246 True/*isStore*/,
2247 sizeofIRType(hWordTy),
2248 env->curr_IP, layout
2250 env->firstRef = False;
2252 break;
2254 case Ist_WrTmp: {
2255 IRExpr* data = st->Ist.WrTmp.data;
2256 if (data->tag == Iex_Load) {
2257 tl_assert(env->curr_IP_known);
2258 if (env->firstRef) {
2259 instrument_mem_access(
2260 env, sbOut,
2261 data->Iex.Load.addr,
2262 sizeofIRType(data->Iex.Load.ty),
2263 False/*!isStore*/,
2264 sizeofIRType(hWordTy),
2265 env->curr_IP, layout
2267 env->firstRef = False;
2270 break;
2273 case Ist_Dirty: {
2274 Int dataSize;
2275 IRDirty* d = st->Ist.Dirty.details;
2276 if (d->mFx != Ifx_None) {
2277 /* This dirty helper accesses memory. Collect the
2278 details. */
2279 tl_assert(env->curr_IP_known);
2280 if (env->firstRef) {
2281 tl_assert(d->mAddr != NULL);
2282 tl_assert(d->mSize != 0);
2283 dataSize = d->mSize;
2284 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2285 instrument_mem_access(
2286 env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2287 sizeofIRType(hWordTy), env->curr_IP, layout
2290 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2291 instrument_mem_access(
2292 env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2293 sizeofIRType(hWordTy), env->curr_IP, layout
2296 env->firstRef = False;
2298 } else {
2299 tl_assert(d->mAddr == NULL);
2300 tl_assert(d->mSize == 0);
2302 break;
2305 case Ist_CAS: {
2306 /* We treat it as a read and a write of the location. I
2307 think that is the same behaviour as it was before IRCAS
2308 was introduced, since prior to that point, the Vex front
2309 ends would translate a lock-prefixed instruction into a
2310 (normal) read followed by a (normal) write. */
2311 if (env->firstRef) {
2312 Int dataSize;
2313 IRCAS* cas = st->Ist.CAS.details;
2314 tl_assert(cas->addr != NULL);
2315 tl_assert(cas->dataLo != NULL);
2316 dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2317 if (cas->dataHi != NULL)
2318 dataSize *= 2; /* since it's a doubleword-CAS */
2319 instrument_mem_access(
2320 env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2321 sizeofIRType(hWordTy), env->curr_IP, layout
2323 instrument_mem_access(
2324 env, sbOut, cas->addr, dataSize, True/*isStore*/,
2325 sizeofIRType(hWordTy), env->curr_IP, layout
2327 env->firstRef = False;
2329 break;
2332 case Ist_LoadG: {
2333 IRLoadG* lg = st->Ist.LoadG.details;
2334 IRType type = Ity_INVALID; /* loaded type */
2335 IRType typeWide = Ity_INVALID; /* after implicit widening */
2336 IRExpr* addr = lg->addr;
2337 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
2338 tl_assert(type != Ity_INVALID);
2339 instrument_mem_access(
2340 env, sbOut, addr, sizeofIRType(type), False/*isStore*/,
2341 sizeofIRType(hWordTy), env->curr_IP, layout
2343 break;
2346 default:
2347 tl_assert(0);
2349 } /* switch (st->tag) */
2353 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2354 possibly modify 'env' as required. This must be the last
2355 instrumentation statement in the block. */
2356 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2357 /*MOD*/IRSB* sbOut,
2358 IRExpr* next,
2359 IRJumpKind jumpkind,
2360 const VexGuestLayout* layout,
2361 IRType gWordTy, IRType hWordTy )
2363 if (!sg_clo_enable_sg_checks)
2364 return;
2366 if (jumpkind == Ijk_Call) {
2367 // Assumes x86 or amd64
2368 IRTemp sp_post_call_insn, fp_post_call_insn;
2369 XArray* frameBlocks;
2370 IRExpr** args;
2371 IRDirty* di;
2372 sp_post_call_insn
2373 = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2374 fp_post_call_insn
2375 = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2376 tl_assert(env->curr_IP_known);
2377 frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2378 tl_assert(frameBlocks);
2379 if (VG_(sizeXA)(frameBlocks) == 0)
2380 frameBlocks = NULL;
2381 args
2382 = mkIRExprVec_5(
2383 IRExpr_RdTmp(sp_post_call_insn),
2384 IRExpr_RdTmp(fp_post_call_insn),
2385 /* assume the call doesn't change FP */
2386 next,
2387 mkIRExpr_HWord( (HWord)frameBlocks ),
2388 mkIRExpr_HWord( sizeofIRType(gWordTy) )
2390 di = unsafeIRDirty_0_N(
2391 3/*regparms*/,
2392 "helperc__new_frame",
2393 VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2394 args );
2395 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2400 //////////////////////////////////////////////////////////////
2401 // //
2402 // end Instrumentation //
2403 // //
2404 //////////////////////////////////////////////////////////////
2407 //////////////////////////////////////////////////////////////
2408 // //
2409 // misc //
2410 // //
2411 //////////////////////////////////////////////////////////////
2413 /* Make a new empty stack frame that is suitable for being the
2414 outermost frame in a stack. It has a creation_sp of effectively
2415 infinity, so it can never be removed. */
2416 static StackFrame* new_root_StackFrame ( void )
2418 StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2419 VG_(memset)( sframe, 0, sizeof(*sframe) );
2420 sframe->creation_sp = ~0UL;
2422 /* This sets up .htab, .htab_size and .htab_used */
2423 initialise_II_hash_table( sframe );
2425 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2427 return sframe;
2430 /* Primary routine for setting up the shadow stack for a new thread.
2431 Note that this is used to create not only child thread stacks, but
2432 the root thread's stack too. We create a new stack with
2433 .creation_sp set to infinity, so that the outermost frame can never
2434 be removed (by shadowStack_unwind). The core calls this function
2435 as soon as a thread is created. We cannot yet get its SP value,
2436 since that may not yet be set. */
2437 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2439 tl_assert(is_sane_TId(child));
2440 if (parent == VG_INVALID_THREADID) {
2441 /* creating the main thread's stack */
2442 } else {
2443 tl_assert(is_sane_TId(parent));
2444 tl_assert(parent != child);
2445 tl_assert(shadowStacks[parent] != NULL);
2446 tl_assert(siTrees[parent] != NULL);
2449 /* Create the child's stack. Bear in mind we may be re-using
2450 it. */
2451 if (shadowStacks[child] == NULL) {
2452 /* First use of this stack. Just allocate an initial frame. */
2453 tl_assert(siTrees[child] == NULL);
2454 } else {
2455 StackFrame *frame, *frame2;
2456 /* re-using a stack. */
2457 /* get rid of the interval tree */
2458 tl_assert(siTrees[child] != NULL);
2459 delete_StackTree( siTrees[child] );
2460 siTrees[child] = NULL;
2461 /* Throw away all existing frames. */
2462 frame = shadowStacks[child];
2463 while (frame->outer)
2464 frame = frame->outer;
2465 tl_assert(frame->depth == 0);
2466 while (frame) {
2467 frame2 = frame->inner;
2468 if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2469 sg_free(frame);
2470 frame = frame2;
2472 shadowStacks[child] = NULL;
2475 tl_assert(shadowStacks[child] == NULL);
2476 tl_assert(siTrees[child] == NULL);
2478 /* Set up the initial stack frame. */
2479 shadowStacks[child] = new_root_StackFrame();
2481 /* and set up the child's stack block interval tree. */
2482 siTrees[child] = new_StackTree();
2485 /* Once a thread is ready to go, the core calls here. We take the
2486 opportunity to push a second frame on its stack, with the
2487 presumably valid SP value that is going to be used for the thread's
2488 startup. Hence we should always wind up with a valid outermost
2489 frame for the thread. */
2490 static void shadowStack_set_initial_SP ( ThreadId tid )
2492 StackFrame* sf;
2493 tl_assert(is_sane_TId(tid));
2494 sf = shadowStacks[tid];
2495 tl_assert(sf != NULL);
2496 tl_assert(sf->outer == NULL);
2497 tl_assert(sf->inner == NULL);
2498 tl_assert(sf->creation_sp == ~0UL);
2499 shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2500 0, VG_(get_IP)(tid), NULL );
2504 //////////////////////////////////////////////////////////////
2505 // //
2506 // main-ish //
2507 // //
2508 //////////////////////////////////////////////////////////////
2510 /* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2511 sp-change analysis, as requested in pc_pre_clo_int(). */
2512 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2513 ThreadId tid = VG_(get_running_tid)();
2514 shadowStack_unwind( tid, old_SP+len );
2517 void sg_pre_clo_init ( void ) {
2518 ourGlobals_init();
2519 init_StackBlocks_set();
2520 init_GlobalBlock_set();
2523 void sg_post_clo_init ( void ) {
2526 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2527 shadowStack_thread_create(parent, child);
2530 void sg_pre_thread_first_insn ( ThreadId tid ) {
2531 shadowStack_set_initial_SP(tid);
2534 void sg_fini(Int exitcode)
2536 if (VG_(clo_stats)) {
2537 VG_(message)(Vg_DebugMsg,
2538 " sg_: %'llu total accesses, of which:\n", stats__total_accesses);
2539 VG_(message)(Vg_DebugMsg,
2540 " sg_: stack0: %'12llu classify\n",
2541 stats__classify_Stack0);
2542 VG_(message)(Vg_DebugMsg,
2543 " sg_: stackN: %'12llu classify\n",
2544 stats__classify_StackN);
2545 VG_(message)(Vg_DebugMsg,
2546 " sg_: global: %'12llu classify\n",
2547 stats__classify_Global);
2548 VG_(message)(Vg_DebugMsg,
2549 " sg_: unknown: %'12llu classify\n",
2550 stats__classify_Unknown);
2551 VG_(message)(Vg_DebugMsg,
2552 " sg_: %'llu Invars preened, of which %'llu changed\n",
2553 stats__Invars_preened, stats__Invars_changed);
2554 VG_(message)(Vg_DebugMsg,
2555 " sg_: t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2556 VG_(message)(Vg_DebugMsg,
2557 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses\n",
2558 stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2559 VG_(message)(Vg_DebugMsg,
2560 " sg_: htab-fast: %'llu hits\n",
2561 stats__htab_fast);
2562 VG_(message)(Vg_DebugMsg,
2563 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2564 stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2569 /*--------------------------------------------------------------------*/
2570 /*--- end sg_main.c ---*/
2571 /*--------------------------------------------------------------------*/