Enable expensive handling of CmpEQ64/CmpNE64 for amd64 by default.
[valgrind.git] / dhat / dh_main.c
blobe13d8b8e3b63fcf3e2dcbc0e49e67f401e96b112
2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
6 /*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
31 #include "pub_tool_basics.h"
32 #include "pub_tool_clientstate.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
45 #define HISTOGRAM_SIZE_LIMIT 1024
47 //------------------------------------------------------------//
48 //--- Globals ---//
49 //------------------------------------------------------------//
51 // Values for the entire run.
52 static ULong g_total_blocks = 0;
53 static ULong g_total_bytes = 0;
55 // Current values.
56 static ULong g_curr_blocks = 0;
57 static ULong g_curr_bytes = 0;
58 static ULong g_curr_instrs = 0; // incremented from generated code
60 // Values at the global max, i.e. when g_curr_bytes peaks.
61 static ULong g_max_blocks = 0;
62 static ULong g_max_bytes = 0;
63 static ULong g_max_instrs = 0;
65 // Values for the entire run. Computed at the end.
66 static ULong g_reads_bytes = 0;
67 static ULong g_writes_bytes = 0;
69 //------------------------------------------------------------//
70 //--- an Interval Tree of live blocks ---//
71 //------------------------------------------------------------//
73 /* Tracks information about live blocks. */
74 typedef
75 struct {
76 Addr payload;
77 SizeT req_szB;
78 ExeContext* ap; /* allocation ec */
79 ULong allocd_at; /* instruction number */
80 ULong reads_bytes;
81 ULong writes_bytes;
82 /* Approx histogram, one byte per payload byte. Counts latch up
83 therefore at 0xFFFF. Can be NULL if the block is resized or if
84 the block is larger than HISTOGRAM_SIZE_LIMIT. */
85 UShort* histoW; /* [0 .. req_szB-1] */
87 Block;
89 /* May not contain zero-sized blocks. May not contain
90 overlapping blocks. */
91 static WordFM* interval_tree = NULL; /* WordFM* Block* void */
93 /* Here's the comparison function. Since the tree is required
94 to contain non-zero sized, non-overlapping blocks, it's good
95 enough to consider any overlap as a match. */
96 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
98 Block* b1 = (Block*)k1;
99 Block* b2 = (Block*)k2;
100 tl_assert(b1->req_szB > 0);
101 tl_assert(b2->req_szB > 0);
102 if (b1->payload + b1->req_szB <= b2->payload) return -1;
103 if (b2->payload + b2->req_szB <= b1->payload) return 1;
104 return 0;
107 // 2-entry cache for find_Block_containing
108 static Block* fbc_cache0 = NULL;
109 static Block* fbc_cache1 = NULL;
111 static UWord stats__n_fBc_cached = 0;
112 static UWord stats__n_fBc_uncached = 0;
113 static UWord stats__n_fBc_notfound = 0;
115 static Block* find_Block_containing ( Addr a )
117 if (LIKELY(fbc_cache0
118 && fbc_cache0->payload <= a
119 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
120 // found at 0
121 stats__n_fBc_cached++;
122 return fbc_cache0;
124 if (LIKELY(fbc_cache1
125 && fbc_cache1->payload <= a
126 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
127 // found at 1; swap 0 and 1
128 Block* tmp = fbc_cache0;
129 fbc_cache0 = fbc_cache1;
130 fbc_cache1 = tmp;
131 stats__n_fBc_cached++;
132 return fbc_cache0;
134 Block fake;
135 fake.payload = a;
136 fake.req_szB = 1;
137 UWord foundkey = 1;
138 UWord foundval = 1;
139 Bool found = VG_(lookupFM)( interval_tree,
140 &foundkey, &foundval, (UWord)&fake );
141 if (!found) {
142 stats__n_fBc_notfound++;
143 return NULL;
145 tl_assert(foundval == 0); // we don't store vals in the interval tree
146 tl_assert(foundkey != 1);
147 Block* res = (Block*)foundkey;
148 tl_assert(res != &fake);
149 // put at the top position
150 fbc_cache1 = fbc_cache0;
151 fbc_cache0 = res;
152 stats__n_fBc_uncached++;
153 return res;
156 // delete a block; asserts if not found. (viz, 'a' must be
157 // known to be present.)
158 static void delete_Block_starting_at ( Addr a )
160 Block fake;
161 fake.payload = a;
162 fake.req_szB = 1;
163 Bool found = VG_(delFromFM)( interval_tree,
164 NULL, NULL, (Addr)&fake );
165 tl_assert(found);
166 fbc_cache0 = fbc_cache1 = NULL;
170 //------------------------------------------------------------//
171 //--- a FM of allocation points (APs) ---//
172 //------------------------------------------------------------//
174 typedef
175 struct {
176 // The allocation point that we're summarising stats for.
177 ExeContext* ap;
179 // Total number of blocks and bytes allocated by this AP.
180 ULong total_blocks;
181 ULong total_bytes;
183 // The current number of blocks and bytes live for this AP.
184 ULong curr_blocks;
185 ULong curr_bytes;
187 // Values at the AP max, i.e. when this AP's curr_bytes peaks.
188 ULong max_blocks; // Blocks at the AP max.
189 ULong max_bytes; // The AP max, measured in bytes.
191 // Values at the global max.
192 ULong at_tgmax_blocks;
193 ULong at_tgmax_bytes;
195 // Total lifetimes of all blocks allocated by this AP. Includes blocks
196 // explicitly freed and blocks implicitly freed at termination.
197 ULong total_lifetimes_instrs;
199 // Number of blocks freed by this AP. (Only used in assertions.)
200 ULong freed_blocks;
202 // Total number of reads and writes in all blocks allocated
203 // by this AP.
204 ULong reads_bytes;
205 ULong writes_bytes;
207 /* Histogram information. We maintain a histogram aggregated for
208 all retiring Blocks allocated by this AP, but only if:
209 - this AP has only ever allocated objects of one size
210 - that size is <= HISTOGRAM_SIZE_LIMIT
211 What we need therefore is a mechanism to see if this AP
212 has only ever allocated blocks of one size.
214 3 states:
215 Unknown because no retirement yet
216 Exactly xsize all retiring blocks are of this size
217 Mixed multiple different sizes seen
219 enum { Unknown=999, Exactly, Mixed } xsize_tag;
220 SizeT xsize;
221 UInt* histo; /* [0 .. xsize-1] */
223 APInfo;
225 /* maps ExeContext*'s to APInfo*'s. Note that the keys must match the
226 .ap field in the values. */
227 static WordFM* apinfo = NULL; /* WordFM* ExeContext* APInfo* */
230 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
231 // all APInfos. Note that this is moderately expensive so we avoid calling it
232 // on every allocation.
233 static void check_for_peak(void)
235 if (g_curr_bytes == g_max_bytes) {
236 // It's a peak. (If there are multiple equal peaks we record the latest
237 // one.)
238 UWord keyW, valW;
239 VG_(initIterFM)(apinfo);
240 while (VG_(nextIterFM)(apinfo, &keyW, &valW)) {
241 APInfo* api = (APInfo*)valW;
242 tl_assert(api && api->ap == (ExeContext*)keyW);
243 api->at_tgmax_blocks = api->curr_blocks;
244 api->at_tgmax_bytes = api->curr_bytes;
246 VG_(doneIterFM)(apinfo);
250 /* 'bk' is being introduced (has just been allocated). Find the
251 relevant APInfo entry for it, or create one, based on the block's
252 allocation EC. Then, update the APInfo to the extent that we
253 actually can, to reflect the allocation. */
254 static void intro_Block ( Block* bk )
256 tl_assert(bk);
257 tl_assert(bk->ap);
259 APInfo* api = NULL;
260 UWord keyW = 0;
261 UWord valW = 0;
262 Bool found = VG_(lookupFM)( apinfo,
263 &keyW, &valW, (UWord)bk->ap );
264 if (found) {
265 api = (APInfo*)valW;
266 tl_assert(keyW == (UWord)bk->ap);
267 } else {
268 api = VG_(malloc)( "dh.intro_Block.1", sizeof(APInfo) );
269 VG_(memset)(api, 0, sizeof(*api));
270 api->ap = bk->ap;
271 Bool present = VG_(addToFM)( apinfo,
272 (UWord)bk->ap, (UWord)api );
273 tl_assert(!present);
274 // histo stuff
275 tl_assert(api->freed_blocks == 0);
276 api->xsize_tag = Unknown;
277 api->xsize = 0;
278 if (0) VG_(printf)("api %p --> Unknown\n", api);
281 tl_assert(api->ap == bk->ap);
283 // Update global stats first.
285 g_total_blocks++;
286 g_total_bytes += bk->req_szB;
288 g_curr_blocks++;
289 g_curr_bytes += bk->req_szB;
290 if (g_curr_bytes > g_max_bytes) {
291 g_max_blocks = g_curr_blocks;
292 g_max_bytes = g_curr_bytes;
293 g_max_instrs = g_curr_instrs;
296 // Now update APInfo stats.
298 api->total_blocks++;
299 api->total_bytes += bk->req_szB;
301 api->curr_blocks++;
302 api->curr_bytes += bk->req_szB;
303 if (api->curr_bytes > api->max_bytes) {
304 api->max_blocks = api->curr_blocks;
305 api->max_bytes = api->curr_bytes;
309 /* 'bk' is retiring (being freed). Find the relevant APInfo entry for
310 it, which must already exist. Then, fold info from 'bk' into that
311 entry. 'because_freed' is True if the block is retiring because
312 the client has freed it. If it is False then the block is retiring
313 because the program has finished, in which case we want to skip the
314 updates of the total blocks live etc for this AP, but still fold in
315 the access counts and histo data that have so far accumulated for
316 the block. */
317 static void retire_Block ( Block* bk, Bool because_freed )
319 tl_assert(bk);
320 tl_assert(bk->ap);
322 APInfo* api = NULL;
323 UWord keyW = 0;
324 UWord valW = 0;
325 Bool found = VG_(lookupFM)( apinfo,
326 &keyW, &valW, (UWord)bk->ap );
328 tl_assert(found);
329 api = (APInfo*)valW;
330 tl_assert(api->ap == bk->ap);
332 // update stats following this free.
333 if (0)
334 VG_(printf)("ec %p api->c_by_l %llu bk->rszB %llu\n",
335 bk->ap, api->curr_bytes, (ULong)bk->req_szB);
337 if (because_freed) {
338 // Total bytes is coming down from a possible peak.
339 check_for_peak();
341 // Then update global stats.
342 tl_assert(g_curr_blocks >= 1);
343 tl_assert(g_curr_bytes >= bk->req_szB);
344 g_curr_blocks--;
345 g_curr_bytes -= bk->req_szB;
347 // Then update APInfo stats.
348 tl_assert(api->curr_blocks >= 1);
349 tl_assert(api->curr_bytes >= bk->req_szB);
350 api->curr_blocks--;
351 api->curr_bytes -= bk->req_szB;
353 api->freed_blocks++;
356 tl_assert(bk->allocd_at <= g_curr_instrs);
357 api->total_lifetimes_instrs += (g_curr_instrs - bk->allocd_at);
359 // access counts
360 api->reads_bytes += bk->reads_bytes;
361 api->writes_bytes += bk->writes_bytes;
362 g_reads_bytes += bk->reads_bytes;
363 g_writes_bytes += bk->writes_bytes;
365 // histo stuff. First, do state transitions for xsize/xsize_tag.
366 switch (api->xsize_tag) {
368 case Unknown:
369 tl_assert(api->xsize == 0);
370 tl_assert(api->freed_blocks == 1 || api->freed_blocks == 0);
371 tl_assert(!api->histo);
372 api->xsize_tag = Exactly;
373 api->xsize = bk->req_szB;
374 if (0) VG_(printf)("api %p --> Exactly(%lu)\n", api, api->xsize);
375 // and allocate the histo
376 if (bk->histoW) {
377 api->histo = VG_(malloc)("dh.retire_Block.1",
378 api->xsize * sizeof(UInt));
379 VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
381 break;
383 case Exactly:
384 //tl_assert(api->freed_blocks > 1);
385 if (bk->req_szB != api->xsize) {
386 if (0) VG_(printf)("api %p --> Mixed(%lu -> %lu)\n",
387 api, api->xsize, bk->req_szB);
388 api->xsize_tag = Mixed;
389 api->xsize = 0;
390 // deallocate the histo, if any
391 if (api->histo) {
392 VG_(free)(api->histo);
393 api->histo = NULL;
396 break;
398 case Mixed:
399 //tl_assert(api->freed_blocks > 1);
400 break;
402 default:
403 tl_assert(0);
406 // See if we can fold the histo data from this block into
407 // the data for the AP
408 if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
409 tl_assert(api->xsize == bk->req_szB);
410 UWord i;
411 for (i = 0; i < api->xsize; i++) {
412 // FIXME: do something better in case of overflow of api->histo[..]
413 // Right now, at least don't let it overflow/wrap around
414 if (api->histo[i] <= 0xFFFE0000)
415 api->histo[i] += (UInt)bk->histoW[i];
417 if (0) VG_(printf)("fold in, AP = %p\n", api);
420 #if 0
421 if (bk->histoB) {
422 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
423 UWord i;
424 for (i = 0; i < bk->req_szB; i++)
425 VG_(printf)("%u ", (UInt)bk->histoB[i]);
426 VG_(printf)("\n");
427 } else {
428 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
430 #endif
433 /* This handles block resizing. When a block with AP 'ec' has a
434 size change of 'delta', call here to update the APInfo. */
435 static void resize_Block(ExeContext* ec, SizeT old_req_szB, SizeT new_req_szB)
437 Long delta = (Long)new_req_szB - (Long)old_req_szB;
438 APInfo* api = NULL;
439 UWord keyW = 0;
440 UWord valW = 0;
441 Bool found = VG_(lookupFM)( apinfo,
442 &keyW, &valW, (UWord)ec );
444 tl_assert(found);
445 api = (APInfo*)valW;
446 tl_assert(api->ap == ec);
448 if (delta < 0) {
449 tl_assert(api->curr_bytes >= -delta);
450 tl_assert(g_curr_bytes >= -delta);
453 // Total bytes might be coming down from a possible peak.
454 if (delta < 0)
455 check_for_peak();
457 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
458 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
460 // A reasonable alternative would be to leave total_blocks unchanged and
461 // increment total_bytes by delta (but only if delta is positive). But then
462 // calls to realloc wouldn't be counted towards the total_blocks count,
463 // which is undesirable.
465 // Update global stats first.
467 g_total_blocks++;
468 g_total_bytes += new_req_szB;
470 g_curr_blocks += 0; // unchanged
471 g_curr_bytes += delta;
472 if (g_curr_bytes > g_max_bytes) {
473 g_max_blocks = g_curr_blocks;
474 g_max_bytes = g_curr_bytes;
475 g_max_instrs = g_curr_instrs;
478 // Now update APInfo stats.
480 api->total_blocks++;
481 api->total_bytes += new_req_szB;
483 api->curr_blocks += 0; // unchanged
484 api->curr_bytes += delta;
485 if (api->curr_bytes > api->max_bytes) {
486 api->max_blocks = api->curr_blocks;
487 api->max_bytes = api->curr_bytes;
492 //------------------------------------------------------------//
493 //--- update both Block and APInfos after {m,re}alloc/free ---//
494 //------------------------------------------------------------//
496 static
497 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
498 Bool is_zeroed )
500 tl_assert(p == NULL); // don't handle custom allocators right now
501 SizeT actual_szB /*, slop_szB*/;
503 if ((SSizeT)req_szB < 0) return NULL;
505 if (req_szB == 0)
506 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
508 // Allocate and zero if necessary
509 if (!p) {
510 p = VG_(cli_malloc)( req_alignB, req_szB );
511 if (!p) {
512 return NULL;
514 if (is_zeroed) VG_(memset)(p, 0, req_szB);
515 actual_szB = VG_(cli_malloc_usable_size)(p);
516 tl_assert(actual_szB >= req_szB);
517 /* slop_szB = actual_szB - req_szB; */
518 } else {
519 /* slop_szB = 0; */
522 // Make new Block, add to interval_tree.
523 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
524 bk->payload = (Addr)p;
525 bk->req_szB = req_szB;
526 bk->ap = VG_(record_ExeContext)(tid, 0/*first word delta*/);
527 bk->allocd_at = g_curr_instrs;
528 bk->reads_bytes = 0;
529 bk->writes_bytes = 0;
530 // set up histogram array, if the block isn't too large
531 bk->histoW = NULL;
532 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
533 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
534 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
537 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
538 tl_assert(!present);
539 fbc_cache0 = fbc_cache1 = NULL;
541 intro_Block(bk);
543 if (0) VG_(printf)("ALLOC %lu -> %p\n", req_szB, p);
545 return p;
548 static
549 void die_block ( void* p, Bool custom_free )
551 tl_assert(!custom_free); // at least for now
553 Block* bk = find_Block_containing( (Addr)p );
555 if (!bk) {
556 return; // bogus free
559 tl_assert(bk->req_szB > 0);
560 // assert the block finder is behaving sanely
561 tl_assert(bk->payload <= (Addr)p);
562 tl_assert( (Addr)p < bk->payload + bk->req_szB );
564 if (bk->payload != (Addr)p) {
565 return; // bogus free
568 if (0) VG_(printf)(" FREE %p %llu\n",
569 p, g_curr_instrs - bk->allocd_at);
571 retire_Block(bk, True/*because_freed*/);
573 VG_(cli_free)( (void*)bk->payload );
574 delete_Block_starting_at( bk->payload );
575 if (bk->histoW) {
576 VG_(free)( bk->histoW );
577 bk->histoW = NULL;
579 VG_(free)( bk );
583 static
584 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
586 if (0) VG_(printf)("REALL %p %lu\n", p_old, new_req_szB);
587 void* p_new = NULL;
589 tl_assert(new_req_szB > 0); // map 0 to 1
591 // Find the old block.
592 Block* bk = find_Block_containing( (Addr)p_old );
593 if (!bk) {
594 return NULL; // bogus realloc
597 tl_assert(bk->req_szB > 0);
598 // assert the block finder is behaving sanely
599 tl_assert(bk->payload <= (Addr)p_old);
600 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
602 if (bk->payload != (Addr)p_old) {
603 return NULL; // bogus realloc
606 // Keeping the histogram alive in any meaningful way across
607 // block resizing is too darn complicated. Just throw it away.
608 if (bk->histoW) {
609 VG_(free)(bk->histoW);
610 bk->histoW = NULL;
613 // Actually do the allocation, if necessary.
614 if (new_req_szB <= bk->req_szB) {
616 // New size is smaller or same; block not moved.
617 resize_Block(bk->ap, bk->req_szB, new_req_szB);
618 bk->req_szB = new_req_szB;
619 return p_old;
621 } else {
623 // New size is bigger; make new block, copy shared contents, free old.
624 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
625 if (!p_new) {
626 // Nb: if realloc fails, NULL is returned but the old block is not
627 // touched. What an awful function.
628 return NULL;
630 tl_assert(p_new != p_old);
632 VG_(memcpy)(p_new, p_old, bk->req_szB);
633 VG_(cli_free)(p_old);
635 // Since the block has moved, we need to re-insert it into the
636 // interval tree at the new place. Do this by removing
637 // and re-adding it.
638 delete_Block_starting_at( (Addr)p_old );
639 // now 'bk' is no longer in the tree, but the Block itself
640 // is still alive
642 // Update the metadata.
643 resize_Block(bk->ap, bk->req_szB, new_req_szB);
644 bk->payload = (Addr)p_new;
645 bk->req_szB = new_req_szB;
647 // and re-add
648 Bool present
649 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
650 tl_assert(!present);
651 fbc_cache0 = fbc_cache1 = NULL;
653 return p_new;
655 /*NOTREACHED*/
656 tl_assert(0);
660 //------------------------------------------------------------//
661 //--- malloc() et al replacement wrappers ---//
662 //------------------------------------------------------------//
664 static void* dh_malloc ( ThreadId tid, SizeT szB )
666 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
669 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
671 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
674 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
676 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
679 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
681 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
684 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
686 return new_block( tid, NULL, szB, alignB, False );
689 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
691 die_block( p, /*custom_free*/False );
694 static void dh___builtin_delete ( ThreadId tid, void* p )
696 die_block( p, /*custom_free*/False);
699 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
701 die_block( p, /*custom_free*/False );
704 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
706 if (p_old == NULL) {
707 return dh_malloc(tid, new_szB);
709 if (new_szB == 0) {
710 dh_free(tid, p_old);
711 return NULL;
713 return renew_block(tid, p_old, new_szB);
716 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
718 Block* bk = find_Block_containing( (Addr)p );
719 return bk ? bk->req_szB : 0;
723 //------------------------------------------------------------//
724 //--- memory references ---//
725 //------------------------------------------------------------//
727 static
728 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
730 UWord i, offMin, offMax1;
731 offMin = addr - bk->payload;
732 tl_assert(offMin < bk->req_szB);
733 offMax1 = offMin + szB;
734 if (offMax1 > bk->req_szB)
735 offMax1 = bk->req_szB;
736 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
737 for (i = offMin; i < offMax1; i++) {
738 UShort n = bk->histoW[i];
739 if (n < 0xFFFF) n++;
740 bk->histoW[i] = n;
744 static VG_REGPARM(2)
745 void dh_handle_write ( Addr addr, UWord szB )
747 Block* bk = find_Block_containing(addr);
748 if (bk) {
749 bk->writes_bytes += szB;
750 if (bk->histoW)
751 inc_histo_for_block(bk, addr, szB);
755 static VG_REGPARM(2)
756 void dh_handle_read ( Addr addr, UWord szB )
758 Block* bk = find_Block_containing(addr);
759 if (bk) {
760 bk->reads_bytes += szB;
761 if (bk->histoW)
762 inc_histo_for_block(bk, addr, szB);
767 // Handle reads and writes by syscalls (read == kernel
768 // reads user space, write == kernel writes user space).
769 // Assumes no such read or write spans a heap block
770 // boundary and so we can treat it just as one giant
771 // read or write.
772 static
773 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
774 Addr base, SizeT size )
776 switch (part) {
777 case Vg_CoreSysCall:
778 dh_handle_read(base, size);
779 break;
780 case Vg_CoreSysCallArgInMem:
781 break;
782 case Vg_CoreTranslate:
783 break;
784 default:
785 tl_assert(0);
789 static
790 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
791 Addr base, SizeT size )
793 switch (part) {
794 case Vg_CoreSysCall:
795 case Vg_CoreClientReq:
796 dh_handle_write(base, size);
797 break;
798 case Vg_CoreSignal:
799 break;
800 default:
801 tl_assert(0);
806 //------------------------------------------------------------//
807 //--- Instrumentation ---//
808 //------------------------------------------------------------//
810 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
811 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
812 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
813 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
814 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
816 static
817 void add_counter_update(IRSB* sbOut, Int n)
819 #if defined(VG_BIGENDIAN)
820 # define END Iend_BE
821 #elif defined(VG_LITTLEENDIAN)
822 # define END Iend_LE
823 #else
824 # error "Unknown endianness"
825 #endif
826 // Add code to increment 'g_curr_instrs' by 'n', like this:
827 // WrTmp(t1, Load64(&g_curr_instrs))
828 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
829 // Store(&g_curr_instrs, t2)
830 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
831 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
832 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_curr_instrs );
834 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
835 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
836 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
838 addStmtToIRSB( sbOut, st1 );
839 addStmtToIRSB( sbOut, st2 );
840 addStmtToIRSB( sbOut, st3 );
843 static
844 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
845 Int goff_sp)
847 IRType tyAddr = Ity_INVALID;
848 const HChar* hName= NULL;
849 void* hAddr = NULL;
850 IRExpr** argv = NULL;
851 IRDirty* di = NULL;
853 const Int THRESH = 4096 * 4; // somewhat arbitrary
854 const Int rz_szB = VG_STACK_REDZONE_SZB;
856 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
857 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
859 if (isWrite) {
860 hName = "dh_handle_write";
861 hAddr = &dh_handle_write;
862 } else {
863 hName = "dh_handle_read";
864 hAddr = &dh_handle_read;
867 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
869 /* Add the helper. */
870 tl_assert(hName);
871 tl_assert(hAddr);
872 tl_assert(argv);
873 di = unsafeIRDirty_0_N( 2/*regparms*/,
874 hName, VG_(fnptr_to_fnentry)( hAddr ),
875 argv );
877 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
878 some arbitrary N. If that fails then addr is in the range (SP -
879 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
880 addr is within a page of SP and so can't possibly be a heap
881 access, and so can be skipped. */
882 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
883 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
885 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
886 addStmtToIRSB(
887 sbOut,
888 assign(sp_minus_rz,
889 tyAddr == Ity_I32
890 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
891 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
894 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
895 addStmtToIRSB(
896 sbOut,
897 assign(diff,
898 tyAddr == Ity_I32
899 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
900 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
903 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
904 addStmtToIRSB(
905 sbOut,
906 assign(guard,
907 tyAddr == Ity_I32
908 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
909 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
911 di->guard = mkexpr(guard);
913 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
916 static
917 IRSB* dh_instrument ( VgCallbackClosure* closure,
918 IRSB* sbIn,
919 const VexGuestLayout* layout,
920 const VexGuestExtents* vge,
921 const VexArchInfo* archinfo_host,
922 IRType gWordTy, IRType hWordTy )
924 Int i, n = 0;
925 IRSB* sbOut;
926 IRTypeEnv* tyenv = sbIn->tyenv;
928 const Int goff_sp = layout->offset_SP;
930 // We increment the instruction count in two places:
931 // - just before any Ist_Exit statements;
932 // - just before the IRSB's end.
933 // In the former case, we zero 'n' and then continue instrumenting.
935 sbOut = deepCopyIRSBExceptStmts(sbIn);
937 // Copy verbatim any IR preamble preceding the first IMark
938 i = 0;
939 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
940 addStmtToIRSB( sbOut, sbIn->stmts[i] );
941 i++;
944 for (/*use current i*/; i < sbIn->stmts_used; i++) {
945 IRStmt* st = sbIn->stmts[i];
947 if (!st || st->tag == Ist_NoOp) continue;
949 switch (st->tag) {
951 case Ist_IMark: {
952 n++;
953 break;
956 case Ist_Exit: {
957 if (n > 0) {
958 // Add an increment before the Exit statement, then reset 'n'.
959 add_counter_update(sbOut, n);
960 n = 0;
962 break;
965 case Ist_WrTmp: {
966 IRExpr* data = st->Ist.WrTmp.data;
967 if (data->tag == Iex_Load) {
968 IRExpr* aexpr = data->Iex.Load.addr;
969 // Note also, endianness info is ignored. I guess
970 // that's not interesting.
971 addMemEvent( sbOut, False/*!isWrite*/,
972 sizeofIRType(data->Iex.Load.ty),
973 aexpr, goff_sp );
975 break;
978 case Ist_Store: {
979 IRExpr* data = st->Ist.Store.data;
980 IRExpr* aexpr = st->Ist.Store.addr;
981 addMemEvent( sbOut, True/*isWrite*/,
982 sizeofIRType(typeOfIRExpr(tyenv, data)),
983 aexpr, goff_sp );
984 break;
987 case Ist_Dirty: {
988 Int dataSize;
989 IRDirty* d = st->Ist.Dirty.details;
990 if (d->mFx != Ifx_None) {
991 /* This dirty helper accesses memory. Collect the details. */
992 tl_assert(d->mAddr != NULL);
993 tl_assert(d->mSize != 0);
994 dataSize = d->mSize;
995 // Large (eg. 28B, 108B, 512B on x86) data-sized
996 // instructions will be done inaccurately, but they're
997 // very rare and this avoids errors from hitting more
998 // than two cache lines in the simulation.
999 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1000 addMemEvent( sbOut, False/*!isWrite*/,
1001 dataSize, d->mAddr, goff_sp );
1002 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1003 addMemEvent( sbOut, True/*isWrite*/,
1004 dataSize, d->mAddr, goff_sp );
1005 } else {
1006 tl_assert(d->mAddr == NULL);
1007 tl_assert(d->mSize == 0);
1009 break;
1012 case Ist_CAS: {
1013 /* We treat it as a read and a write of the location. I
1014 think that is the same behaviour as it was before IRCAS
1015 was introduced, since prior to that point, the Vex
1016 front ends would translate a lock-prefixed instruction
1017 into a (normal) read followed by a (normal) write. */
1018 Int dataSize;
1019 IRCAS* cas = st->Ist.CAS.details;
1020 tl_assert(cas->addr != NULL);
1021 tl_assert(cas->dataLo != NULL);
1022 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1023 if (cas->dataHi != NULL)
1024 dataSize *= 2; /* since it's a doubleword-CAS */
1025 addMemEvent( sbOut, False/*!isWrite*/,
1026 dataSize, cas->addr, goff_sp );
1027 addMemEvent( sbOut, True/*isWrite*/,
1028 dataSize, cas->addr, goff_sp );
1029 break;
1032 case Ist_LLSC: {
1033 IRType dataTy;
1034 if (st->Ist.LLSC.storedata == NULL) {
1035 /* LL */
1036 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1037 addMemEvent( sbOut, False/*!isWrite*/,
1038 sizeofIRType(dataTy),
1039 st->Ist.LLSC.addr, goff_sp );
1040 } else {
1041 /* SC */
1042 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1043 addMemEvent( sbOut, True/*isWrite*/,
1044 sizeofIRType(dataTy),
1045 st->Ist.LLSC.addr, goff_sp );
1047 break;
1050 default:
1051 break;
1054 addStmtToIRSB( sbOut, st );
1057 if (n > 0) {
1058 // Add an increment before the SB end.
1059 add_counter_update(sbOut, n);
1061 return sbOut;
1064 #undef binop
1065 #undef mkexpr
1066 #undef mkU32
1067 #undef mkU64
1068 #undef assign
1071 //------------------------------------------------------------//
1072 //--- Command line args ---//
1073 //------------------------------------------------------------//
1075 static const HChar* clo_dhat_out_file = "dhat.out.%p";
1077 static Bool dh_process_cmd_line_option(const HChar* arg)
1079 if VG_STR_CLO(arg, "--dhat-out-file", clo_dhat_out_file) {}
1081 else
1082 return VG_(replacement_malloc_process_cmd_line_option)(arg);
1084 return True;
1087 static void dh_print_usage(void)
1089 VG_(printf)(
1090 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
1094 static void dh_print_debug_usage(void)
1096 VG_(printf)(
1097 " (none)\n"
1102 //------------------------------------------------------------//
1103 //--- Finalisation ---//
1104 //------------------------------------------------------------//
1106 // File format notes.
1108 // - The files are JSON, because it's a widely-used format and saves us having
1109 // to write a parser in dh_view.js.
1111 // - We use a comma-first style for the generated JSON. Comma-first style
1112 // moves the special case for arrays/objects from the last item to the
1113 // first. This helps in cases where you can't easily tell in advance the
1114 // size of arrays/objects, such as iterating over a WordFM (because
1115 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1116 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1118 // - We use short field names and minimal whitespace to minimize file sizes.
1120 static VgFile* fp;
1122 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1124 // The frame table holds unique frames.
1125 static WordFM* frame_tbl = NULL;
1126 static UWord next_frame_n = 0;
1128 static Word frame_cmp(UWord a, UWord b)
1130 return VG_(strcmp)((const HChar*)a, (const HChar*)b);
1133 static HChar hex_digit_to_ascii_char(UChar d)
1135 d = d & 0xf;
1136 return (d < 10) ? ('0' + d) : ('a' + (d - 10));
1139 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1141 // Returns the original string if no escaping was required. Returns a pointer
1142 // to a static buffer if escaping was required. Therefore, the return value is
1143 // only valid until the next call to this function.
1144 static const HChar* json_escape(const HChar* s)
1146 static HChar* buf = NULL;
1147 static SizeT bufcap = 0;
1149 // Do we need any escaping?
1150 SizeT extra = 0;
1151 const HChar* p = s;
1152 while (*p) {
1153 UChar c = *p;
1154 if (c == '"' || c == '\\') {
1155 extra += 1;
1156 } else if (c <= 0x1f) {
1157 extra += 5;
1159 p++;
1161 SizeT len = p - s;
1163 if (extra == 0) {
1164 // No escaping needed.
1165 return s;
1168 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1169 // necessary.
1170 SizeT newcap = len + extra + 1;
1171 if (bufcap < newcap) {
1172 buf = VG_(realloc)("dh.json", buf, newcap);
1173 bufcap = newcap;
1176 p = s;
1177 HChar* q = buf;
1178 while (*p) {
1179 UChar c = *p;
1180 if (c == '"') {
1181 *q++ = '\\';
1182 *q++ = '"';
1183 } else if (c == '\\') {
1184 *q++ = '\\';
1185 *q++ = '\\';
1186 } else if (c <= 0x1f) {
1187 *q++ = '\\';
1188 *q++ = 'u';
1189 *q++ = '0';
1190 *q++ = '0';
1191 *q++ = hex_digit_to_ascii_char((c & 0x00f0) >> 4);
1192 *q++ = hex_digit_to_ascii_char(c & 0x000f);
1193 } else {
1194 *q++ = c;
1196 p++;
1198 *q = '\0';
1200 return buf;
1203 static void write_APInfo_frame(UInt n, DiEpoch ep, Addr ip, void* opaque)
1205 Bool* is_first = (Bool*)opaque;
1206 InlIPCursor* iipc = VG_(new_IIPC)(ep, ip);
1208 do {
1209 const HChar* buf = VG_(describe_IP)(ep, ip, iipc);
1211 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1212 // `realloc`, `operator new`) because they're boring and clog up the
1213 // output.
1214 if (VG_(strstr)(buf, "vg_replace_malloc.c")) {
1215 continue;
1218 // If this description has been seen before, get its number. Otherwise,
1219 // give it a new number and put it in the table.
1220 UWord keyW = 0, valW = 0;
1221 UWord frame_n = 0;
1222 Bool found = VG_(lookupFM)(frame_tbl, &keyW, &valW, (UWord)buf);
1223 if (found) {
1224 //const HChar* str = (const HChar*)keyW;
1225 //tl_assert(0 == VG_(strcmp)(buf, str));
1226 frame_n = valW;
1227 } else {
1228 // `buf` is a static buffer, we must copy it.
1229 const HChar* str = VG_(strdup)("dh.frame_tbl.3", buf);
1230 frame_n = next_frame_n++;
1231 Bool present = VG_(addToFM)(frame_tbl, (UWord)str, frame_n);
1232 tl_assert(!present);
1235 FP("%c%lu", *is_first ? '[' : ',', frame_n);
1236 *is_first = False;
1238 } while (VG_(next_IIPC)(iipc));
1240 VG_(delete_IIPC)(iipc);
1243 static void write_APInfo(APInfo* api, Bool is_first)
1245 tl_assert(api->total_blocks >= api->max_blocks);
1246 tl_assert(api->total_bytes >= api->max_bytes);
1248 FP(" %c{\"tb\":%llu,\"tbk\":%llu,\"tli\":%llu\n",
1249 is_first ? '[' : ',',
1250 api->total_bytes, api->total_blocks, api->total_lifetimes_instrs);
1251 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1252 api->max_bytes, api->max_blocks);
1253 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1254 api->at_tgmax_bytes, api->at_tgmax_blocks);
1255 FP(" ,\"fb\":%llu,\"fbk\":%llu\n",
1256 api->curr_bytes, api->curr_blocks);
1257 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1258 api->reads_bytes, api->writes_bytes);
1260 if (api->histo && api->xsize_tag == Exactly) {
1261 FP(" ,\"acc\":[");
1263 // Simple run-length encoding: when N entries in a row have the same
1264 // value M, we print "-N,M". If there is just one in a row, we just
1265 // print "M". This reduces file size significantly.
1266 UShort repval = 0;
1267 Int reps = 0;
1268 for (UWord i = 0; i < api->xsize; i++) {
1269 UShort h = api->histo[i];
1270 if (repval == h) {
1271 // Continue current run.
1272 reps++;
1273 } else {
1274 // End of run; print it.
1275 if (reps == 1) {
1276 FP("%u,", repval);
1277 } else if (reps > 1) {
1278 FP("-%d,%u,", reps, repval);
1280 reps = 1;
1281 repval = h;
1284 // Print the final run.
1285 if (reps == 1) {
1286 FP("%u", repval);
1287 } else if (reps > 1) {
1288 FP("-%d,%u", reps, repval);
1291 FP("]\n");
1294 FP(" ,\"fs\":");
1295 Bool is_first_frame = True;
1296 VG_(apply_ExeContext)(write_APInfo_frame, &is_first_frame, api->ap);
1297 FP("]\n");
1299 FP(" }\n");
1302 static void write_APInfos(void)
1304 UWord keyW, valW;
1306 FP(",\"aps\":\n");
1308 VG_(initIterFM)(apinfo);
1309 Bool is_first = True;
1310 while (VG_(nextIterFM)(apinfo, &keyW, &valW)) {
1311 APInfo* api = (APInfo*)valW;
1312 tl_assert(api && api->ap == (ExeContext*)keyW);
1313 write_APInfo(api, is_first);
1314 is_first = False;
1316 VG_(doneIterFM)(apinfo);
1318 if (is_first) {
1319 // We didn't print any elements. This happens if apinfo is empty.
1320 FP(" [\n");
1323 FP(" ]\n");
1326 static void dh_fini(Int exit_status)
1328 // This function does lots of allocations that it doesn't bother to free,
1329 // because execution is almost over anyway.
1331 // Total bytes might be at a possible peak.
1332 check_for_peak();
1334 // Before printing statistics, we must harvest various stats (such as
1335 // lifetimes and accesses) for all the blocks that are still alive.
1336 UWord keyW, valW;
1337 VG_(initIterFM)( interval_tree );
1338 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1339 Block* bk = (Block*)keyW;
1340 tl_assert(valW == 0);
1341 tl_assert(bk);
1342 retire_Block(bk, False/*!because_freed*/);
1344 VG_(doneIterFM)( interval_tree );
1346 // Stats.
1347 if (VG_(clo_stats)) {
1348 VG_(dmsg)(" dhat: find_Block_containing:\n");
1349 VG_(dmsg)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1350 stats__n_fBc_cached + stats__n_fBc_uncached,
1351 stats__n_fBc_cached,
1352 stats__n_fBc_uncached);
1353 VG_(dmsg)(" notfound: %'lu\n", stats__n_fBc_notfound);
1354 VG_(dmsg)("\n");
1357 // Create the frame table, and insert the special "[root]" node at index 0.
1358 frame_tbl = VG_(newFM)(VG_(malloc),
1359 "dh.frame_tbl.1",
1360 VG_(free),
1361 frame_cmp);
1362 const HChar* root = VG_(strdup)("dh.frame_tbl.2", "[root]");
1363 Bool present = VG_(addToFM)(frame_tbl, (UWord)root, 0);
1364 tl_assert(!present);
1365 next_frame_n = 1;
1367 // Setup output filename. Nb: it's important to do this now, i.e. as late
1368 // as possible. If we do it at start-up and the program forks and the
1369 // output file format string contains a %p (pid) specifier, both the parent
1370 // and child will incorrectly write to the same file; this happened in
1371 // 3.3.0.
1372 HChar* dhat_out_file =
1373 VG_(expand_file_name)("--dhat-out-file", clo_dhat_out_file);
1375 fp = VG_(fopen)(dhat_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1376 VKI_S_IRUSR|VKI_S_IWUSR);
1377 if (!fp) {
1378 VG_(umsg)("error: can't open DHAT output file '%s'\n", dhat_out_file);
1379 VG_(free)(dhat_out_file);
1380 return;
1383 // Write to data file.
1384 FP("{\"dhatFileVersion\":1\n");
1386 // The command.
1387 const HChar* exe = VG_(args_the_exename);
1388 FP(",\"cmd\":\"%s", json_escape(exe));
1389 for (Word i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
1390 const HChar* arg = *(HChar**)VG_(indexXA)(VG_(args_for_client), i);
1391 FP(" %s", json_escape(arg));
1393 FP("\"\n");
1395 // The PID.
1396 FP(",\"pid\":%d\n", VG_(getpid)());
1398 // Times.
1399 FP(",\"mi\":%llu,\"ei\":%llu\n", g_max_instrs, g_curr_instrs);
1401 // APs.
1402 write_APInfos();
1404 // Frame table.
1405 FP(",\"ftbl\":\n");
1407 // The frame table maps strings to numbers. We want to print it ordered by
1408 // numbers. So we create an array and fill it in from the frame table, then
1409 // print that.
1410 UWord n_frames = next_frame_n;
1411 const HChar** frames =
1412 VG_(malloc)("dh.frames", n_frames * sizeof(const HChar*));
1413 VG_(initIterFM)(frame_tbl);
1414 while (VG_(nextIterFM)(frame_tbl, &keyW, &valW)) {
1415 const HChar* str = (const HChar*)keyW;
1416 UWord n = valW;
1417 frames[n] = str;
1419 VG_(doneIterFM)(frame_tbl);
1421 for (UWord i = 0; i < n_frames; i++) {
1422 FP(" %c\"%s\"\n", i == 0 ? '[' : ',', json_escape(frames[i]));
1424 FP(" ]\n");
1425 VG_(free)(frames);
1427 FP("}\n");
1429 VG_(fclose)(fp);
1430 fp = NULL;
1432 if (VG_(clo_verbosity) == 0) {
1433 return;
1436 // Print brief global stats.
1437 VG_(umsg)("Total: %'llu bytes in %'llu blocks\n",
1438 g_total_bytes, g_total_blocks);
1439 VG_(umsg)("At t-gmax: %'llu bytes in %'llu blocks\n",
1440 g_max_bytes, g_max_blocks);
1441 VG_(umsg)("At t-end: %'llu bytes in %'llu blocks\n",
1442 g_curr_bytes, g_curr_blocks);
1443 VG_(umsg)("Reads: %'llu bytes\n", g_reads_bytes);
1444 VG_(umsg)("Writes: %'llu bytes\n", g_writes_bytes);
1446 // Print a how-to-view-the-profile hint.
1447 VG_(umsg)("\n");
1448 VG_(umsg)("To view the resulting profile, open\n");
1449 VG_(umsg)(" file://%s/%s\n", DHAT_VIEW_DIR, "dh_view.html");
1450 VG_(umsg)("in a web browser, click on \"Load...\" "
1451 "and then select the file\n");
1452 VG_(umsg)(" %s\n", dhat_out_file);
1453 VG_(umsg)("Scroll to the end the displayed page to see a short\n");
1454 VG_(umsg)("explanation of some of the abbreviations used in the page.\n");
1456 VG_(free)(dhat_out_file);
1459 //------------------------------------------------------------//
1460 //--- Initialisation ---//
1461 //------------------------------------------------------------//
1463 static void dh_post_clo_init(void)
1467 static void dh_pre_clo_init(void)
1469 VG_(details_name) ("DHAT");
1470 VG_(details_version) (NULL);
1471 VG_(details_description) ("a dynamic heap analysis tool");
1472 VG_(details_copyright_author)(
1473 "Copyright (C) 2010-2018, and GNU GPL'd, by Mozilla Foundation");
1474 VG_(details_bug_reports_to) (VG_BUGS_TO);
1476 // Basic functions.
1477 VG_(basic_tool_funcs) (dh_post_clo_init,
1478 dh_instrument,
1479 dh_fini);
1480 //zz
1481 // Needs.
1482 VG_(needs_libc_freeres)();
1483 VG_(needs_cxx_freeres)();
1484 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1485 dh_print_usage,
1486 dh_print_debug_usage);
1487 //zz VG_(needs_client_requests) (dh_handle_client_request);
1488 //zz VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1489 //zz dh_expensive_sanity_check);
1490 VG_(needs_malloc_replacement) (dh_malloc,
1491 dh___builtin_new,
1492 dh___builtin_vec_new,
1493 dh_memalign,
1494 dh_calloc,
1495 dh_free,
1496 dh___builtin_delete,
1497 dh___builtin_vec_delete,
1498 dh_realloc,
1499 dh_malloc_usable_size,
1500 0 );
1502 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1503 //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1504 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1506 tl_assert(!interval_tree);
1507 tl_assert(!fbc_cache0);
1508 tl_assert(!fbc_cache1);
1510 interval_tree = VG_(newFM)( VG_(malloc),
1511 "dh.interval_tree.1",
1512 VG_(free),
1513 interval_tree_Cmp );
1515 apinfo = VG_(newFM)( VG_(malloc),
1516 "dh.apinfo.1",
1517 VG_(free),
1518 NULL/*unboxedcmp*/ );
1521 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1523 //--------------------------------------------------------------------//
1524 //--- end dh_main.c ---//
1525 //--------------------------------------------------------------------//