2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
31 #include "pub_tool_basics.h"
32 #include "pub_tool_clientstate.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
45 #define HISTOGRAM_SIZE_LIMIT 1024
47 //------------------------------------------------------------//
49 //------------------------------------------------------------//
51 // Values for the entire run.
52 static ULong g_total_blocks
= 0;
53 static ULong g_total_bytes
= 0;
56 static ULong g_curr_blocks
= 0;
57 static ULong g_curr_bytes
= 0;
58 static ULong g_curr_instrs
= 0; // incremented from generated code
60 // Values at the global max, i.e. when g_curr_bytes peaks.
61 static ULong g_max_blocks
= 0;
62 static ULong g_max_bytes
= 0;
63 static ULong g_max_instrs
= 0;
65 // Values for the entire run. Computed at the end.
66 static ULong g_reads_bytes
= 0;
67 static ULong g_writes_bytes
= 0;
69 //------------------------------------------------------------//
70 //--- an Interval Tree of live blocks ---//
71 //------------------------------------------------------------//
73 /* Tracks information about live blocks. */
78 ExeContext
* ap
; /* allocation ec */
79 ULong allocd_at
; /* instruction number */
82 /* Approx histogram, one byte per payload byte. Counts latch up
83 therefore at 0xFFFF. Can be NULL if the block is resized or if
84 the block is larger than HISTOGRAM_SIZE_LIMIT. */
85 UShort
* histoW
; /* [0 .. req_szB-1] */
89 /* May not contain zero-sized blocks. May not contain
90 overlapping blocks. */
91 static WordFM
* interval_tree
= NULL
; /* WordFM* Block* void */
93 /* Here's the comparison function. Since the tree is required
94 to contain non-zero sized, non-overlapping blocks, it's good
95 enough to consider any overlap as a match. */
96 static Word
interval_tree_Cmp ( UWord k1
, UWord k2
)
98 Block
* b1
= (Block
*)k1
;
99 Block
* b2
= (Block
*)k2
;
100 tl_assert(b1
->req_szB
> 0);
101 tl_assert(b2
->req_szB
> 0);
102 if (b1
->payload
+ b1
->req_szB
<= b2
->payload
) return -1;
103 if (b2
->payload
+ b2
->req_szB
<= b1
->payload
) return 1;
107 // 2-entry cache for find_Block_containing
108 static Block
* fbc_cache0
= NULL
;
109 static Block
* fbc_cache1
= NULL
;
111 static UWord stats__n_fBc_cached
= 0;
112 static UWord stats__n_fBc_uncached
= 0;
113 static UWord stats__n_fBc_notfound
= 0;
115 static Block
* find_Block_containing ( Addr a
)
117 if (LIKELY(fbc_cache0
118 && fbc_cache0
->payload
<= a
119 && a
< fbc_cache0
->payload
+ fbc_cache0
->req_szB
)) {
121 stats__n_fBc_cached
++;
124 if (LIKELY(fbc_cache1
125 && fbc_cache1
->payload
<= a
126 && a
< fbc_cache1
->payload
+ fbc_cache1
->req_szB
)) {
127 // found at 1; swap 0 and 1
128 Block
* tmp
= fbc_cache0
;
129 fbc_cache0
= fbc_cache1
;
131 stats__n_fBc_cached
++;
139 Bool found
= VG_(lookupFM
)( interval_tree
,
140 &foundkey
, &foundval
, (UWord
)&fake
);
142 stats__n_fBc_notfound
++;
145 tl_assert(foundval
== 0); // we don't store vals in the interval tree
146 tl_assert(foundkey
!= 1);
147 Block
* res
= (Block
*)foundkey
;
148 tl_assert(res
!= &fake
);
149 // put at the top position
150 fbc_cache1
= fbc_cache0
;
152 stats__n_fBc_uncached
++;
156 // delete a block; asserts if not found. (viz, 'a' must be
157 // known to be present.)
158 static void delete_Block_starting_at ( Addr a
)
163 Bool found
= VG_(delFromFM
)( interval_tree
,
164 NULL
, NULL
, (Addr
)&fake
);
166 fbc_cache0
= fbc_cache1
= NULL
;
170 //------------------------------------------------------------//
171 //--- a FM of allocation points (APs) ---//
172 //------------------------------------------------------------//
176 // The allocation point that we're summarising stats for.
179 // Total number of blocks and bytes allocated by this AP.
183 // The current number of blocks and bytes live for this AP.
187 // Values at the AP max, i.e. when this AP's curr_bytes peaks.
188 ULong max_blocks
; // Blocks at the AP max.
189 ULong max_bytes
; // The AP max, measured in bytes.
191 // Values at the global max.
192 ULong at_tgmax_blocks
;
193 ULong at_tgmax_bytes
;
195 // Total lifetimes of all blocks allocated by this AP. Includes blocks
196 // explicitly freed and blocks implicitly freed at termination.
197 ULong total_lifetimes_instrs
;
199 // Number of blocks freed by this AP. (Only used in assertions.)
202 // Total number of reads and writes in all blocks allocated
207 /* Histogram information. We maintain a histogram aggregated for
208 all retiring Blocks allocated by this AP, but only if:
209 - this AP has only ever allocated objects of one size
210 - that size is <= HISTOGRAM_SIZE_LIMIT
211 What we need therefore is a mechanism to see if this AP
212 has only ever allocated blocks of one size.
215 Unknown because no retirement yet
216 Exactly xsize all retiring blocks are of this size
217 Mixed multiple different sizes seen
219 enum { Unknown
=999, Exactly
, Mixed
} xsize_tag
;
221 UInt
* histo
; /* [0 .. xsize-1] */
225 /* maps ExeContext*'s to APInfo*'s. Note that the keys must match the
226 .ap field in the values. */
227 static WordFM
* apinfo
= NULL
; /* WordFM* ExeContext* APInfo* */
230 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
231 // all APInfos. Note that this is moderately expensive so we avoid calling it
232 // on every allocation.
233 static void check_for_peak(void)
235 if (g_curr_bytes
== g_max_bytes
) {
236 // It's a peak. (If there are multiple equal peaks we record the latest
239 VG_(initIterFM
)(apinfo
);
240 while (VG_(nextIterFM
)(apinfo
, &keyW
, &valW
)) {
241 APInfo
* api
= (APInfo
*)valW
;
242 tl_assert(api
&& api
->ap
== (ExeContext
*)keyW
);
243 api
->at_tgmax_blocks
= api
->curr_blocks
;
244 api
->at_tgmax_bytes
= api
->curr_bytes
;
246 VG_(doneIterFM
)(apinfo
);
250 /* 'bk' is being introduced (has just been allocated). Find the
251 relevant APInfo entry for it, or create one, based on the block's
252 allocation EC. Then, update the APInfo to the extent that we
253 actually can, to reflect the allocation. */
254 static void intro_Block ( Block
* bk
)
262 Bool found
= VG_(lookupFM
)( apinfo
,
263 &keyW
, &valW
, (UWord
)bk
->ap
);
266 tl_assert(keyW
== (UWord
)bk
->ap
);
268 api
= VG_(malloc
)( "dh.intro_Block.1", sizeof(APInfo
) );
269 VG_(memset
)(api
, 0, sizeof(*api
));
271 Bool present
= VG_(addToFM
)( apinfo
,
272 (UWord
)bk
->ap
, (UWord
)api
);
275 tl_assert(api
->freed_blocks
== 0);
276 api
->xsize_tag
= Unknown
;
278 if (0) VG_(printf
)("api %p --> Unknown\n", api
);
281 tl_assert(api
->ap
== bk
->ap
);
283 // Update global stats first.
286 g_total_bytes
+= bk
->req_szB
;
289 g_curr_bytes
+= bk
->req_szB
;
290 if (g_curr_bytes
> g_max_bytes
) {
291 g_max_blocks
= g_curr_blocks
;
292 g_max_bytes
= g_curr_bytes
;
293 g_max_instrs
= g_curr_instrs
;
296 // Now update APInfo stats.
299 api
->total_bytes
+= bk
->req_szB
;
302 api
->curr_bytes
+= bk
->req_szB
;
303 if (api
->curr_bytes
> api
->max_bytes
) {
304 api
->max_blocks
= api
->curr_blocks
;
305 api
->max_bytes
= api
->curr_bytes
;
309 /* 'bk' is retiring (being freed). Find the relevant APInfo entry for
310 it, which must already exist. Then, fold info from 'bk' into that
311 entry. 'because_freed' is True if the block is retiring because
312 the client has freed it. If it is False then the block is retiring
313 because the program has finished, in which case we want to skip the
314 updates of the total blocks live etc for this AP, but still fold in
315 the access counts and histo data that have so far accumulated for
317 static void retire_Block ( Block
* bk
, Bool because_freed
)
325 Bool found
= VG_(lookupFM
)( apinfo
,
326 &keyW
, &valW
, (UWord
)bk
->ap
);
330 tl_assert(api
->ap
== bk
->ap
);
332 // update stats following this free.
334 VG_(printf
)("ec %p api->c_by_l %llu bk->rszB %llu\n",
335 bk
->ap
, api
->curr_bytes
, (ULong
)bk
->req_szB
);
338 // Total bytes is coming down from a possible peak.
341 // Then update global stats.
342 tl_assert(g_curr_blocks
>= 1);
343 tl_assert(g_curr_bytes
>= bk
->req_szB
);
345 g_curr_bytes
-= bk
->req_szB
;
347 // Then update APInfo stats.
348 tl_assert(api
->curr_blocks
>= 1);
349 tl_assert(api
->curr_bytes
>= bk
->req_szB
);
351 api
->curr_bytes
-= bk
->req_szB
;
356 tl_assert(bk
->allocd_at
<= g_curr_instrs
);
357 api
->total_lifetimes_instrs
+= (g_curr_instrs
- bk
->allocd_at
);
360 api
->reads_bytes
+= bk
->reads_bytes
;
361 api
->writes_bytes
+= bk
->writes_bytes
;
362 g_reads_bytes
+= bk
->reads_bytes
;
363 g_writes_bytes
+= bk
->writes_bytes
;
365 // histo stuff. First, do state transitions for xsize/xsize_tag.
366 switch (api
->xsize_tag
) {
369 tl_assert(api
->xsize
== 0);
370 tl_assert(api
->freed_blocks
== 1 || api
->freed_blocks
== 0);
371 tl_assert(!api
->histo
);
372 api
->xsize_tag
= Exactly
;
373 api
->xsize
= bk
->req_szB
;
374 if (0) VG_(printf
)("api %p --> Exactly(%lu)\n", api
, api
->xsize
);
375 // and allocate the histo
377 api
->histo
= VG_(malloc
)("dh.retire_Block.1",
378 api
->xsize
* sizeof(UInt
));
379 VG_(memset
)(api
->histo
, 0, api
->xsize
* sizeof(UInt
));
384 //tl_assert(api->freed_blocks > 1);
385 if (bk
->req_szB
!= api
->xsize
) {
386 if (0) VG_(printf
)("api %p --> Mixed(%lu -> %lu)\n",
387 api
, api
->xsize
, bk
->req_szB
);
388 api
->xsize_tag
= Mixed
;
390 // deallocate the histo, if any
392 VG_(free
)(api
->histo
);
399 //tl_assert(api->freed_blocks > 1);
406 // See if we can fold the histo data from this block into
407 // the data for the AP
408 if (api
->xsize_tag
== Exactly
&& api
->histo
&& bk
->histoW
) {
409 tl_assert(api
->xsize
== bk
->req_szB
);
411 for (i
= 0; i
< api
->xsize
; i
++) {
412 // FIXME: do something better in case of overflow of api->histo[..]
413 // Right now, at least don't let it overflow/wrap around
414 if (api
->histo
[i
] <= 0xFFFE0000)
415 api
->histo
[i
] += (UInt
)bk
->histoW
[i
];
417 if (0) VG_(printf
)("fold in, AP = %p\n", api
);
422 VG_(printf
)("block retiring, histo %lu: ", bk
->req_szB
);
424 for (i
= 0; i
< bk
->req_szB
; i
++)
425 VG_(printf
)("%u ", (UInt
)bk
->histoB
[i
]);
428 VG_(printf
)("block retiring, no histo %lu\n", bk
->req_szB
);
433 /* This handles block resizing. When a block with AP 'ec' has a
434 size change of 'delta', call here to update the APInfo. */
435 static void resize_Block(ExeContext
* ec
, SizeT old_req_szB
, SizeT new_req_szB
)
437 Long delta
= (Long
)new_req_szB
- (Long
)old_req_szB
;
441 Bool found
= VG_(lookupFM
)( apinfo
,
442 &keyW
, &valW
, (UWord
)ec
);
446 tl_assert(api
->ap
== ec
);
449 tl_assert(api
->curr_bytes
>= -delta
);
450 tl_assert(g_curr_bytes
>= -delta
);
453 // Total bytes might be coming down from a possible peak.
457 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
458 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
460 // A reasonable alternative would be to leave total_blocks unchanged and
461 // increment total_bytes by delta (but only if delta is positive). But then
462 // calls to realloc wouldn't be counted towards the total_blocks count,
463 // which is undesirable.
465 // Update global stats first.
468 g_total_bytes
+= new_req_szB
;
470 g_curr_blocks
+= 0; // unchanged
471 g_curr_bytes
+= delta
;
472 if (g_curr_bytes
> g_max_bytes
) {
473 g_max_blocks
= g_curr_blocks
;
474 g_max_bytes
= g_curr_bytes
;
475 g_max_instrs
= g_curr_instrs
;
478 // Now update APInfo stats.
481 api
->total_bytes
+= new_req_szB
;
483 api
->curr_blocks
+= 0; // unchanged
484 api
->curr_bytes
+= delta
;
485 if (api
->curr_bytes
> api
->max_bytes
) {
486 api
->max_blocks
= api
->curr_blocks
;
487 api
->max_bytes
= api
->curr_bytes
;
492 //------------------------------------------------------------//
493 //--- update both Block and APInfos after {m,re}alloc/free ---//
494 //------------------------------------------------------------//
497 void* new_block ( ThreadId tid
, void* p
, SizeT req_szB
, SizeT req_alignB
,
500 tl_assert(p
== NULL
); // don't handle custom allocators right now
501 SizeT actual_szB
/*, slop_szB*/;
503 if ((SSizeT
)req_szB
< 0) return NULL
;
506 req_szB
= 1; /* can't allow zero-sized blocks in the interval tree */
508 // Allocate and zero if necessary
510 p
= VG_(cli_malloc
)( req_alignB
, req_szB
);
514 if (is_zeroed
) VG_(memset
)(p
, 0, req_szB
);
515 actual_szB
= VG_(cli_malloc_usable_size
)(p
);
516 tl_assert(actual_szB
>= req_szB
);
517 /* slop_szB = actual_szB - req_szB; */
522 // Make new Block, add to interval_tree.
523 Block
* bk
= VG_(malloc
)("dh.new_block.1", sizeof(Block
));
524 bk
->payload
= (Addr
)p
;
525 bk
->req_szB
= req_szB
;
526 bk
->ap
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
527 bk
->allocd_at
= g_curr_instrs
;
529 bk
->writes_bytes
= 0;
530 // set up histogram array, if the block isn't too large
532 if (req_szB
<= HISTOGRAM_SIZE_LIMIT
) {
533 bk
->histoW
= VG_(malloc
)("dh.new_block.2", req_szB
* sizeof(UShort
));
534 VG_(memset
)(bk
->histoW
, 0, req_szB
* sizeof(UShort
));
537 Bool present
= VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
539 fbc_cache0
= fbc_cache1
= NULL
;
543 if (0) VG_(printf
)("ALLOC %lu -> %p\n", req_szB
, p
);
549 void die_block ( void* p
, Bool custom_free
)
551 tl_assert(!custom_free
); // at least for now
553 Block
* bk
= find_Block_containing( (Addr
)p
);
556 return; // bogus free
559 tl_assert(bk
->req_szB
> 0);
560 // assert the block finder is behaving sanely
561 tl_assert(bk
->payload
<= (Addr
)p
);
562 tl_assert( (Addr
)p
< bk
->payload
+ bk
->req_szB
);
564 if (bk
->payload
!= (Addr
)p
) {
565 return; // bogus free
568 if (0) VG_(printf
)(" FREE %p %llu\n",
569 p
, g_curr_instrs
- bk
->allocd_at
);
571 retire_Block(bk
, True
/*because_freed*/);
573 VG_(cli_free
)( (void*)bk
->payload
);
574 delete_Block_starting_at( bk
->payload
);
576 VG_(free
)( bk
->histoW
);
584 void* renew_block ( ThreadId tid
, void* p_old
, SizeT new_req_szB
)
586 if (0) VG_(printf
)("REALL %p %lu\n", p_old
, new_req_szB
);
589 tl_assert(new_req_szB
> 0); // map 0 to 1
591 // Find the old block.
592 Block
* bk
= find_Block_containing( (Addr
)p_old
);
594 return NULL
; // bogus realloc
597 tl_assert(bk
->req_szB
> 0);
598 // assert the block finder is behaving sanely
599 tl_assert(bk
->payload
<= (Addr
)p_old
);
600 tl_assert( (Addr
)p_old
< bk
->payload
+ bk
->req_szB
);
602 if (bk
->payload
!= (Addr
)p_old
) {
603 return NULL
; // bogus realloc
606 // Keeping the histogram alive in any meaningful way across
607 // block resizing is too darn complicated. Just throw it away.
609 VG_(free
)(bk
->histoW
);
613 // Actually do the allocation, if necessary.
614 if (new_req_szB
<= bk
->req_szB
) {
616 // New size is smaller or same; block not moved.
617 resize_Block(bk
->ap
, bk
->req_szB
, new_req_szB
);
618 bk
->req_szB
= new_req_szB
;
623 // New size is bigger; make new block, copy shared contents, free old.
624 p_new
= VG_(cli_malloc
)(VG_(clo_alignment
), new_req_szB
);
626 // Nb: if realloc fails, NULL is returned but the old block is not
627 // touched. What an awful function.
630 tl_assert(p_new
!= p_old
);
632 VG_(memcpy
)(p_new
, p_old
, bk
->req_szB
);
633 VG_(cli_free
)(p_old
);
635 // Since the block has moved, we need to re-insert it into the
636 // interval tree at the new place. Do this by removing
638 delete_Block_starting_at( (Addr
)p_old
);
639 // now 'bk' is no longer in the tree, but the Block itself
642 // Update the metadata.
643 resize_Block(bk
->ap
, bk
->req_szB
, new_req_szB
);
644 bk
->payload
= (Addr
)p_new
;
645 bk
->req_szB
= new_req_szB
;
649 = VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
651 fbc_cache0
= fbc_cache1
= NULL
;
660 //------------------------------------------------------------//
661 //--- malloc() et al replacement wrappers ---//
662 //------------------------------------------------------------//
664 static void* dh_malloc ( ThreadId tid
, SizeT szB
)
666 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
669 static void* dh___builtin_new ( ThreadId tid
, SizeT szB
)
671 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
674 static void* dh___builtin_vec_new ( ThreadId tid
, SizeT szB
)
676 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
679 static void* dh_calloc ( ThreadId tid
, SizeT m
, SizeT szB
)
681 return new_block( tid
, NULL
, m
*szB
, VG_(clo_alignment
), /*is_zeroed*/True
);
684 static void *dh_memalign ( ThreadId tid
, SizeT alignB
, SizeT szB
)
686 return new_block( tid
, NULL
, szB
, alignB
, False
);
689 static void dh_free ( ThreadId tid
__attribute__((unused
)), void* p
)
691 die_block( p
, /*custom_free*/False
);
694 static void dh___builtin_delete ( ThreadId tid
, void* p
)
696 die_block( p
, /*custom_free*/False
);
699 static void dh___builtin_vec_delete ( ThreadId tid
, void* p
)
701 die_block( p
, /*custom_free*/False
);
704 static void* dh_realloc ( ThreadId tid
, void* p_old
, SizeT new_szB
)
707 return dh_malloc(tid
, new_szB
);
713 return renew_block(tid
, p_old
, new_szB
);
716 static SizeT
dh_malloc_usable_size ( ThreadId tid
, void* p
)
718 Block
* bk
= find_Block_containing( (Addr
)p
);
719 return bk
? bk
->req_szB
: 0;
723 //------------------------------------------------------------//
724 //--- memory references ---//
725 //------------------------------------------------------------//
728 void inc_histo_for_block ( Block
* bk
, Addr addr
, UWord szB
)
730 UWord i
, offMin
, offMax1
;
731 offMin
= addr
- bk
->payload
;
732 tl_assert(offMin
< bk
->req_szB
);
733 offMax1
= offMin
+ szB
;
734 if (offMax1
> bk
->req_szB
)
735 offMax1
= bk
->req_szB
;
736 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
737 for (i
= offMin
; i
< offMax1
; i
++) {
738 UShort n
= bk
->histoW
[i
];
745 void dh_handle_write ( Addr addr
, UWord szB
)
747 Block
* bk
= find_Block_containing(addr
);
749 bk
->writes_bytes
+= szB
;
751 inc_histo_for_block(bk
, addr
, szB
);
756 void dh_handle_read ( Addr addr
, UWord szB
)
758 Block
* bk
= find_Block_containing(addr
);
760 bk
->reads_bytes
+= szB
;
762 inc_histo_for_block(bk
, addr
, szB
);
767 // Handle reads and writes by syscalls (read == kernel
768 // reads user space, write == kernel writes user space).
769 // Assumes no such read or write spans a heap block
770 // boundary and so we can treat it just as one giant
773 void dh_handle_noninsn_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
774 Addr base
, SizeT size
)
778 dh_handle_read(base
, size
);
780 case Vg_CoreSysCallArgInMem
:
782 case Vg_CoreTranslate
:
790 void dh_handle_noninsn_write ( CorePart part
, ThreadId tid
,
791 Addr base
, SizeT size
)
795 case Vg_CoreClientReq
:
796 dh_handle_write(base
, size
);
806 //------------------------------------------------------------//
807 //--- Instrumentation ---//
808 //------------------------------------------------------------//
810 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
811 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
812 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
813 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
814 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
817 void add_counter_update(IRSB
* sbOut
, Int n
)
819 #if defined(VG_BIGENDIAN)
821 #elif defined(VG_LITTLEENDIAN)
824 # error "Unknown endianness"
826 // Add code to increment 'g_curr_instrs' by 'n', like this:
827 // WrTmp(t1, Load64(&g_curr_instrs))
828 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
829 // Store(&g_curr_instrs, t2)
830 IRTemp t1
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
831 IRTemp t2
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
832 IRExpr
* counter_addr
= mkIRExpr_HWord( (HWord
)&g_curr_instrs
);
834 IRStmt
* st1
= assign(t1
, IRExpr_Load(END
, Ity_I64
, counter_addr
));
835 IRStmt
* st2
= assign(t2
, binop(Iop_Add64
, mkexpr(t1
), mkU64(n
)));
836 IRStmt
* st3
= IRStmt_Store(END
, counter_addr
, mkexpr(t2
));
838 addStmtToIRSB( sbOut
, st1
);
839 addStmtToIRSB( sbOut
, st2
);
840 addStmtToIRSB( sbOut
, st3
);
844 void addMemEvent(IRSB
* sbOut
, Bool isWrite
, Int szB
, IRExpr
* addr
,
847 IRType tyAddr
= Ity_INVALID
;
848 const HChar
* hName
= NULL
;
850 IRExpr
** argv
= NULL
;
853 const Int THRESH
= 4096 * 4; // somewhat arbitrary
854 const Int rz_szB
= VG_STACK_REDZONE_SZB
;
856 tyAddr
= typeOfIRExpr( sbOut
->tyenv
, addr
);
857 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
860 hName
= "dh_handle_write";
861 hAddr
= &dh_handle_write
;
863 hName
= "dh_handle_read";
864 hAddr
= &dh_handle_read
;
867 argv
= mkIRExprVec_2( addr
, mkIRExpr_HWord(szB
) );
869 /* Add the helper. */
873 di
= unsafeIRDirty_0_N( 2/*regparms*/,
874 hName
, VG_(fnptr_to_fnentry
)( hAddr
),
877 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
878 some arbitrary N. If that fails then addr is in the range (SP -
879 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
880 addr is within a page of SP and so can't possibly be a heap
881 access, and so can be skipped. */
882 IRTemp sp
= newIRTemp(sbOut
->tyenv
, tyAddr
);
883 addStmtToIRSB( sbOut
, assign(sp
, IRExpr_Get(goff_sp
, tyAddr
)));
885 IRTemp sp_minus_rz
= newIRTemp(sbOut
->tyenv
, tyAddr
);
890 ? binop(Iop_Sub32
, mkexpr(sp
), mkU32(rz_szB
))
891 : binop(Iop_Sub64
, mkexpr(sp
), mkU64(rz_szB
)))
894 IRTemp diff
= newIRTemp(sbOut
->tyenv
, tyAddr
);
899 ? binop(Iop_Sub32
, addr
, mkexpr(sp_minus_rz
))
900 : binop(Iop_Sub64
, addr
, mkexpr(sp_minus_rz
)))
903 IRTemp guard
= newIRTemp(sbOut
->tyenv
, Ity_I1
);
908 ? binop(Iop_CmpLT32U
, mkU32(THRESH
), mkexpr(diff
))
909 : binop(Iop_CmpLT64U
, mkU64(THRESH
), mkexpr(diff
)))
911 di
->guard
= mkexpr(guard
);
913 addStmtToIRSB( sbOut
, IRStmt_Dirty(di
) );
917 IRSB
* dh_instrument ( VgCallbackClosure
* closure
,
919 const VexGuestLayout
* layout
,
920 const VexGuestExtents
* vge
,
921 const VexArchInfo
* archinfo_host
,
922 IRType gWordTy
, IRType hWordTy
)
926 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
928 const Int goff_sp
= layout
->offset_SP
;
930 // We increment the instruction count in two places:
931 // - just before any Ist_Exit statements;
932 // - just before the IRSB's end.
933 // In the former case, we zero 'n' and then continue instrumenting.
935 sbOut
= deepCopyIRSBExceptStmts(sbIn
);
937 // Copy verbatim any IR preamble preceding the first IMark
939 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
940 addStmtToIRSB( sbOut
, sbIn
->stmts
[i
] );
944 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
945 IRStmt
* st
= sbIn
->stmts
[i
];
947 if (!st
|| st
->tag
== Ist_NoOp
) continue;
958 // Add an increment before the Exit statement, then reset 'n'.
959 add_counter_update(sbOut
, n
);
966 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
967 if (data
->tag
== Iex_Load
) {
968 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
969 // Note also, endianness info is ignored. I guess
970 // that's not interesting.
971 addMemEvent( sbOut
, False
/*!isWrite*/,
972 sizeofIRType(data
->Iex
.Load
.ty
),
979 IRExpr
* data
= st
->Ist
.Store
.data
;
980 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
981 addMemEvent( sbOut
, True
/*isWrite*/,
982 sizeofIRType(typeOfIRExpr(tyenv
, data
)),
989 IRDirty
* d
= st
->Ist
.Dirty
.details
;
990 if (d
->mFx
!= Ifx_None
) {
991 /* This dirty helper accesses memory. Collect the details. */
992 tl_assert(d
->mAddr
!= NULL
);
993 tl_assert(d
->mSize
!= 0);
995 // Large (eg. 28B, 108B, 512B on x86) data-sized
996 // instructions will be done inaccurately, but they're
997 // very rare and this avoids errors from hitting more
998 // than two cache lines in the simulation.
999 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
1000 addMemEvent( sbOut
, False
/*!isWrite*/,
1001 dataSize
, d
->mAddr
, goff_sp
);
1002 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
1003 addMemEvent( sbOut
, True
/*isWrite*/,
1004 dataSize
, d
->mAddr
, goff_sp
);
1006 tl_assert(d
->mAddr
== NULL
);
1007 tl_assert(d
->mSize
== 0);
1013 /* We treat it as a read and a write of the location. I
1014 think that is the same behaviour as it was before IRCAS
1015 was introduced, since prior to that point, the Vex
1016 front ends would translate a lock-prefixed instruction
1017 into a (normal) read followed by a (normal) write. */
1019 IRCAS
* cas
= st
->Ist
.CAS
.details
;
1020 tl_assert(cas
->addr
!= NULL
);
1021 tl_assert(cas
->dataLo
!= NULL
);
1022 dataSize
= sizeofIRType(typeOfIRExpr(tyenv
, cas
->dataLo
));
1023 if (cas
->dataHi
!= NULL
)
1024 dataSize
*= 2; /* since it's a doubleword-CAS */
1025 addMemEvent( sbOut
, False
/*!isWrite*/,
1026 dataSize
, cas
->addr
, goff_sp
);
1027 addMemEvent( sbOut
, True
/*isWrite*/,
1028 dataSize
, cas
->addr
, goff_sp
);
1034 if (st
->Ist
.LLSC
.storedata
== NULL
) {
1036 dataTy
= typeOfIRTemp(tyenv
, st
->Ist
.LLSC
.result
);
1037 addMemEvent( sbOut
, False
/*!isWrite*/,
1038 sizeofIRType(dataTy
),
1039 st
->Ist
.LLSC
.addr
, goff_sp
);
1042 dataTy
= typeOfIRExpr(tyenv
, st
->Ist
.LLSC
.storedata
);
1043 addMemEvent( sbOut
, True
/*isWrite*/,
1044 sizeofIRType(dataTy
),
1045 st
->Ist
.LLSC
.addr
, goff_sp
);
1054 addStmtToIRSB( sbOut
, st
);
1058 // Add an increment before the SB end.
1059 add_counter_update(sbOut
, n
);
1071 //------------------------------------------------------------//
1072 //--- Command line args ---//
1073 //------------------------------------------------------------//
1075 static const HChar
* clo_dhat_out_file
= "dhat.out.%p";
1077 static Bool
dh_process_cmd_line_option(const HChar
* arg
)
1079 if VG_STR_CLO(arg
, "--dhat-out-file", clo_dhat_out_file
) {}
1082 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
1087 static void dh_print_usage(void)
1090 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
1094 static void dh_print_debug_usage(void)
1102 //------------------------------------------------------------//
1103 //--- Finalisation ---//
1104 //------------------------------------------------------------//
1106 // File format notes.
1108 // - The files are JSON, because it's a widely-used format and saves us having
1109 // to write a parser in dh_view.js.
1111 // - We use a comma-first style for the generated JSON. Comma-first style
1112 // moves the special case for arrays/objects from the last item to the
1113 // first. This helps in cases where you can't easily tell in advance the
1114 // size of arrays/objects, such as iterating over a WordFM (because
1115 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1116 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1118 // - We use short field names and minimal whitespace to minimize file sizes.
1122 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1124 // The frame table holds unique frames.
1125 static WordFM
* frame_tbl
= NULL
;
1126 static UWord next_frame_n
= 0;
1128 static Word
frame_cmp(UWord a
, UWord b
)
1130 return VG_(strcmp
)((const HChar
*)a
, (const HChar
*)b
);
1133 static HChar
hex_digit_to_ascii_char(UChar d
)
1136 return (d
< 10) ? ('0' + d
) : ('a' + (d
- 10));
1139 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1141 // Returns the original string if no escaping was required. Returns a pointer
1142 // to a static buffer if escaping was required. Therefore, the return value is
1143 // only valid until the next call to this function.
1144 static const HChar
* json_escape(const HChar
* s
)
1146 static HChar
* buf
= NULL
;
1147 static SizeT bufcap
= 0;
1149 // Do we need any escaping?
1154 if (c
== '"' || c
== '\\') {
1156 } else if (c
<= 0x1f) {
1164 // No escaping needed.
1168 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1170 SizeT newcap
= len
+ extra
+ 1;
1171 if (bufcap
< newcap
) {
1172 buf
= VG_(realloc
)("dh.json", buf
, newcap
);
1183 } else if (c
== '\\') {
1186 } else if (c
<= 0x1f) {
1191 *q
++ = hex_digit_to_ascii_char((c
& 0x00f0) >> 4);
1192 *q
++ = hex_digit_to_ascii_char(c
& 0x000f);
1203 static void write_APInfo_frame(UInt n
, DiEpoch ep
, Addr ip
, void* opaque
)
1205 Bool
* is_first
= (Bool
*)opaque
;
1206 InlIPCursor
* iipc
= VG_(new_IIPC
)(ep
, ip
);
1209 const HChar
* buf
= VG_(describe_IP
)(ep
, ip
, iipc
);
1211 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1212 // `realloc`, `operator new`) because they're boring and clog up the
1214 if (VG_(strstr
)(buf
, "vg_replace_malloc.c")) {
1218 // If this description has been seen before, get its number. Otherwise,
1219 // give it a new number and put it in the table.
1220 UWord keyW
= 0, valW
= 0;
1222 Bool found
= VG_(lookupFM
)(frame_tbl
, &keyW
, &valW
, (UWord
)buf
);
1224 //const HChar* str = (const HChar*)keyW;
1225 //tl_assert(0 == VG_(strcmp)(buf, str));
1228 // `buf` is a static buffer, we must copy it.
1229 const HChar
* str
= VG_(strdup
)("dh.frame_tbl.3", buf
);
1230 frame_n
= next_frame_n
++;
1231 Bool present
= VG_(addToFM
)(frame_tbl
, (UWord
)str
, frame_n
);
1232 tl_assert(!present
);
1235 FP("%c%lu", *is_first
? '[' : ',', frame_n
);
1238 } while (VG_(next_IIPC
)(iipc
));
1240 VG_(delete_IIPC
)(iipc
);
1243 static void write_APInfo(APInfo
* api
, Bool is_first
)
1245 tl_assert(api
->total_blocks
>= api
->max_blocks
);
1246 tl_assert(api
->total_bytes
>= api
->max_bytes
);
1248 FP(" %c{\"tb\":%llu,\"tbk\":%llu,\"tli\":%llu\n",
1249 is_first
? '[' : ',',
1250 api
->total_bytes
, api
->total_blocks
, api
->total_lifetimes_instrs
);
1251 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1252 api
->max_bytes
, api
->max_blocks
);
1253 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1254 api
->at_tgmax_bytes
, api
->at_tgmax_blocks
);
1255 FP(" ,\"fb\":%llu,\"fbk\":%llu\n",
1256 api
->curr_bytes
, api
->curr_blocks
);
1257 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1258 api
->reads_bytes
, api
->writes_bytes
);
1260 if (api
->histo
&& api
->xsize_tag
== Exactly
) {
1263 // Simple run-length encoding: when N entries in a row have the same
1264 // value M, we print "-N,M". If there is just one in a row, we just
1265 // print "M". This reduces file size significantly.
1268 for (UWord i
= 0; i
< api
->xsize
; i
++) {
1269 UShort h
= api
->histo
[i
];
1271 // Continue current run.
1274 // End of run; print it.
1277 } else if (reps
> 1) {
1278 FP("-%d,%u,", reps
, repval
);
1284 // Print the final run.
1287 } else if (reps
> 1) {
1288 FP("-%d,%u", reps
, repval
);
1295 Bool is_first_frame
= True
;
1296 VG_(apply_ExeContext
)(write_APInfo_frame
, &is_first_frame
, api
->ap
);
1302 static void write_APInfos(void)
1308 VG_(initIterFM
)(apinfo
);
1309 Bool is_first
= True
;
1310 while (VG_(nextIterFM
)(apinfo
, &keyW
, &valW
)) {
1311 APInfo
* api
= (APInfo
*)valW
;
1312 tl_assert(api
&& api
->ap
== (ExeContext
*)keyW
);
1313 write_APInfo(api
, is_first
);
1316 VG_(doneIterFM
)(apinfo
);
1319 // We didn't print any elements. This happens if apinfo is empty.
1326 static void dh_fini(Int exit_status
)
1328 // This function does lots of allocations that it doesn't bother to free,
1329 // because execution is almost over anyway.
1331 // Total bytes might be at a possible peak.
1334 // Before printing statistics, we must harvest various stats (such as
1335 // lifetimes and accesses) for all the blocks that are still alive.
1337 VG_(initIterFM
)( interval_tree
);
1338 while (VG_(nextIterFM
)( interval_tree
, &keyW
, &valW
)) {
1339 Block
* bk
= (Block
*)keyW
;
1340 tl_assert(valW
== 0);
1342 retire_Block(bk
, False
/*!because_freed*/);
1344 VG_(doneIterFM
)( interval_tree
);
1347 if (VG_(clo_stats
)) {
1348 VG_(dmsg
)(" dhat: find_Block_containing:\n");
1349 VG_(dmsg
)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1350 stats__n_fBc_cached
+ stats__n_fBc_uncached
,
1351 stats__n_fBc_cached
,
1352 stats__n_fBc_uncached
);
1353 VG_(dmsg
)(" notfound: %'lu\n", stats__n_fBc_notfound
);
1357 // Create the frame table, and insert the special "[root]" node at index 0.
1358 frame_tbl
= VG_(newFM
)(VG_(malloc
),
1362 const HChar
* root
= VG_(strdup
)("dh.frame_tbl.2", "[root]");
1363 Bool present
= VG_(addToFM
)(frame_tbl
, (UWord
)root
, 0);
1364 tl_assert(!present
);
1367 // Setup output filename. Nb: it's important to do this now, i.e. as late
1368 // as possible. If we do it at start-up and the program forks and the
1369 // output file format string contains a %p (pid) specifier, both the parent
1370 // and child will incorrectly write to the same file; this happened in
1372 HChar
* dhat_out_file
=
1373 VG_(expand_file_name
)("--dhat-out-file", clo_dhat_out_file
);
1375 fp
= VG_(fopen
)(dhat_out_file
, VKI_O_CREAT
|VKI_O_TRUNC
|VKI_O_WRONLY
,
1376 VKI_S_IRUSR
|VKI_S_IWUSR
);
1378 VG_(umsg
)("error: can't open DHAT output file '%s'\n", dhat_out_file
);
1379 VG_(free
)(dhat_out_file
);
1383 // Write to data file.
1384 FP("{\"dhatFileVersion\":1\n");
1387 const HChar
* exe
= VG_(args_the_exename
);
1388 FP(",\"cmd\":\"%s", json_escape(exe
));
1389 for (Word i
= 0; i
< VG_(sizeXA
)(VG_(args_for_client
)); i
++) {
1390 const HChar
* arg
= *(HChar
**)VG_(indexXA
)(VG_(args_for_client
), i
);
1391 FP(" %s", json_escape(arg
));
1396 FP(",\"pid\":%d\n", VG_(getpid
)());
1399 FP(",\"mi\":%llu,\"ei\":%llu\n", g_max_instrs
, g_curr_instrs
);
1407 // The frame table maps strings to numbers. We want to print it ordered by
1408 // numbers. So we create an array and fill it in from the frame table, then
1410 UWord n_frames
= next_frame_n
;
1411 const HChar
** frames
=
1412 VG_(malloc
)("dh.frames", n_frames
* sizeof(const HChar
*));
1413 VG_(initIterFM
)(frame_tbl
);
1414 while (VG_(nextIterFM
)(frame_tbl
, &keyW
, &valW
)) {
1415 const HChar
* str
= (const HChar
*)keyW
;
1419 VG_(doneIterFM
)(frame_tbl
);
1421 for (UWord i
= 0; i
< n_frames
; i
++) {
1422 FP(" %c\"%s\"\n", i
== 0 ? '[' : ',', json_escape(frames
[i
]));
1432 if (VG_(clo_verbosity
) == 0) {
1436 // Print brief global stats.
1437 VG_(umsg
)("Total: %'llu bytes in %'llu blocks\n",
1438 g_total_bytes
, g_total_blocks
);
1439 VG_(umsg
)("At t-gmax: %'llu bytes in %'llu blocks\n",
1440 g_max_bytes
, g_max_blocks
);
1441 VG_(umsg
)("At t-end: %'llu bytes in %'llu blocks\n",
1442 g_curr_bytes
, g_curr_blocks
);
1443 VG_(umsg
)("Reads: %'llu bytes\n", g_reads_bytes
);
1444 VG_(umsg
)("Writes: %'llu bytes\n", g_writes_bytes
);
1446 // Print a how-to-view-the-profile hint.
1448 VG_(umsg
)("To view the resulting profile, open\n");
1449 VG_(umsg
)(" file://%s/%s\n", DHAT_VIEW_DIR
, "dh_view.html");
1450 VG_(umsg
)("in a web browser, click on \"Load...\" "
1451 "and then select the file\n");
1452 VG_(umsg
)(" %s\n", dhat_out_file
);
1453 VG_(umsg
)("Scroll to the end the displayed page to see a short\n");
1454 VG_(umsg
)("explanation of some of the abbreviations used in the page.\n");
1456 VG_(free
)(dhat_out_file
);
1459 //------------------------------------------------------------//
1460 //--- Initialisation ---//
1461 //------------------------------------------------------------//
1463 static void dh_post_clo_init(void)
1467 static void dh_pre_clo_init(void)
1469 VG_(details_name
) ("DHAT");
1470 VG_(details_version
) (NULL
);
1471 VG_(details_description
) ("a dynamic heap analysis tool");
1472 VG_(details_copyright_author
)(
1473 "Copyright (C) 2010-2018, and GNU GPL'd, by Mozilla Foundation");
1474 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
1477 VG_(basic_tool_funcs
) (dh_post_clo_init
,
1482 VG_(needs_libc_freeres
)();
1483 VG_(needs_cxx_freeres
)();
1484 VG_(needs_command_line_options
)(dh_process_cmd_line_option
,
1486 dh_print_debug_usage
);
1487 //zz VG_(needs_client_requests) (dh_handle_client_request);
1488 //zz VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1489 //zz dh_expensive_sanity_check);
1490 VG_(needs_malloc_replacement
) (dh_malloc
,
1492 dh___builtin_vec_new
,
1496 dh___builtin_delete
,
1497 dh___builtin_vec_delete
,
1499 dh_malloc_usable_size
,
1502 VG_(track_pre_mem_read
) ( dh_handle_noninsn_read
);
1503 //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1504 VG_(track_post_mem_write
) ( dh_handle_noninsn_write
);
1506 tl_assert(!interval_tree
);
1507 tl_assert(!fbc_cache0
);
1508 tl_assert(!fbc_cache1
);
1510 interval_tree
= VG_(newFM
)( VG_(malloc
),
1511 "dh.interval_tree.1",
1513 interval_tree_Cmp
);
1515 apinfo
= VG_(newFM
)( VG_(malloc
),
1518 NULL
/*unboxedcmp*/ );
1521 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init
)
1523 //--------------------------------------------------------------------//
1524 //--- end dh_main.c ---//
1525 //--------------------------------------------------------------------//