2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
30 #include "pub_tool_basics.h"
31 #include "pub_tool_clientstate.h"
32 #include "pub_tool_clreq.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
47 #define HISTOGRAM_SIZE_LIMIT 1024
49 //------------------------------------------------------------//
51 //------------------------------------------------------------//
53 // Values for the entire run.
54 static ULong g_total_blocks
= 0;
55 static ULong g_total_bytes
= 0;
57 // Current values. g_curr_blocks and g_curr_bytes are only used with
59 static ULong g_curr_blocks
= 0;
60 static ULong g_curr_bytes
= 0;
61 static ULong g_curr_instrs
= 0; // incremented from generated code
63 // Values at the global max, i.e. when g_curr_bytes peaks.
64 // Only used with clo_mode=Heap.
65 static ULong g_max_blocks
= 0;
66 static ULong g_max_bytes
= 0;
68 // Time of the global max.
69 static ULong g_tgmax_instrs
= 0;
71 // Values for the entire run. Updated each time a block is retired.
72 // Only used with clo_mode=Heap.
73 static ULong g_reads_bytes
= 0;
74 static ULong g_writes_bytes
= 0;
76 //------------------------------------------------------------//
77 //--- Command line args ---//
78 //------------------------------------------------------------//
80 typedef enum { Heap
=55, Copy
, AdHoc
} ProfileKind
;
82 static ProfileKind clo_mode
= Heap
;
84 static const HChar
* clo_dhat_out_file
= "dhat.out.%p";
86 static Bool
dh_process_cmd_line_option(const HChar
* arg
)
88 if VG_STR_CLO(arg
, "--dhat-out-file", clo_dhat_out_file
) {
90 } else if (VG_XACT_CLO(arg
, "--mode=heap", clo_mode
, Heap
)) {
91 } else if (VG_XACT_CLO(arg
, "--mode=copy", clo_mode
, Copy
)) {
92 } else if (VG_XACT_CLO(arg
, "--mode=ad-hoc", clo_mode
, AdHoc
)) {
95 return VG_(replacement_malloc_process_cmd_line_option
)(arg
);
101 static void dh_print_usage(void)
104 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
105 " --mode=heap|copy|ad-hoc profiling mode\n"
109 static void dh_print_debug_usage(void)
116 //------------------------------------------------------------//
117 //--- an Interval Tree of live blocks ---//
118 //------------------------------------------------------------//
120 /* Tracks information about live blocks. */
125 ExeContext
* ec
; /* allocation ec */
126 ULong allocd_at
; /* instruction number */
129 /* Approx histogram, one byte per payload byte. Counts latch up
130 therefore at 0xFFFF. Can be NULL if the block is resized or if
131 the block is larger than HISTOGRAM_SIZE_LIMIT. */
132 UShort
* histoW
; /* [0 .. req_szB-1] */
136 /* May not contain zero-sized blocks. May not contain
137 overlapping blocks. */
138 static WordFM
* interval_tree
= NULL
; /* WordFM* Block* void */
140 /* Here's the comparison function. Since the tree is required
141 to contain non-zero sized, non-overlapping blocks, it's good
142 enough to consider any overlap as a match. */
143 static Word
interval_tree_Cmp ( UWord k1
, UWord k2
)
145 Block
* b1
= (Block
*)k1
;
146 Block
* b2
= (Block
*)k2
;
147 tl_assert(b1
->req_szB
> 0);
148 tl_assert(b2
->req_szB
> 0);
149 if (b1
->payload
+ b1
->req_szB
<= b2
->payload
) return -1;
150 if (b2
->payload
+ b2
->req_szB
<= b1
->payload
) return 1;
154 // 2-entry cache for find_Block_containing
155 static Block
* fbc_cache0
= NULL
;
156 static Block
* fbc_cache1
= NULL
;
158 static UWord stats__n_fBc_cached
= 0;
159 static UWord stats__n_fBc_uncached
= 0;
160 static UWord stats__n_fBc_notfound
= 0;
162 static Block
* find_Block_containing ( Addr a
)
164 tl_assert(clo_mode
== Heap
);
166 if (LIKELY(fbc_cache0
167 && fbc_cache0
->payload
<= a
168 && a
< fbc_cache0
->payload
+ fbc_cache0
->req_szB
)) {
170 stats__n_fBc_cached
++;
173 if (LIKELY(fbc_cache1
174 && fbc_cache1
->payload
<= a
175 && a
< fbc_cache1
->payload
+ fbc_cache1
->req_szB
)) {
176 // found at 1; swap 0 and 1
177 Block
* tmp
= fbc_cache0
;
178 fbc_cache0
= fbc_cache1
;
180 stats__n_fBc_cached
++;
188 Bool found
= VG_(lookupFM
)( interval_tree
,
189 &foundkey
, &foundval
, (UWord
)&fake
);
191 stats__n_fBc_notfound
++;
194 tl_assert(foundval
== 0); // we don't store vals in the interval tree
195 tl_assert(foundkey
!= 1);
196 Block
* res
= (Block
*)foundkey
;
197 tl_assert(res
!= &fake
);
198 // put at the top position
199 fbc_cache1
= fbc_cache0
;
201 stats__n_fBc_uncached
++;
205 // delete a block; asserts if not found. (viz, 'a' must be
206 // known to be present.)
207 static void delete_Block_starting_at ( Addr a
)
209 tl_assert(clo_mode
== Heap
);
214 Bool found
= VG_(delFromFM
)( interval_tree
,
215 NULL
, NULL
, (Addr
)&fake
);
217 fbc_cache0
= fbc_cache1
= NULL
;
220 //------------------------------------------------------------//
221 //--- a FM of allocation points (APs) ---//
222 //------------------------------------------------------------//
226 // The program point that we're summarising stats for.
229 // Total number of blocks and bytes allocated by this PP.
233 // The current number of blocks and bytes live for this PP.
234 // Only used with clo_mode=Heap.
238 // Values at the PP max, i.e. when this PP's curr_bytes peaks.
239 // Only used with clo_mode=Heap.
240 ULong max_blocks
; // Blocks at the PP max.
241 ULong max_bytes
; // The PP max, measured in bytes.
243 // Values at the global max.
244 // Only used with clo_mode=Heap.
245 ULong at_tgmax_blocks
;
246 ULong at_tgmax_bytes
;
248 // Total lifetimes of all blocks allocated by this PP. Includes blocks
249 // explicitly freed and blocks implicitly freed at termination.
250 // Only used with clo_mode=Heap.
251 ULong total_lifetimes_instrs
;
253 // Number of blocks freed by this PP. (Only used in assertions.)
254 // Only used with clo_mode=Heap.
257 // Total number of reads and writes in all blocks allocated
258 // by this PP. Only used with clo_mode=Heap.
262 /* Histogram information. We maintain a histogram aggregated for
263 all retiring Blocks allocated by this PP, but only if:
264 - this PP has only ever allocated objects of one size
265 - that size is <= HISTOGRAM_SIZE_LIMIT
266 What we need therefore is a mechanism to see if this PP
267 has only ever allocated blocks of one size.
270 Unknown because no retirement yet
271 Exactly xsize all retiring blocks are of this size
272 Mixed multiple different sizes seen
274 Only used with clo_mode=Heap.
276 enum { Unknown
=999, Exactly
, Mixed
} xsize_tag
;
278 UInt
* histo
; /* [0 .. xsize-1] */
282 /* maps ExeContext*'s to PPInfo*'s. Note that the keys must match the
283 .ec field in the values. */
284 static WordFM
* ppinfo
= NULL
; /* WordFM* ExeContext* PPInfo* */
286 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
287 // all PPInfos. Note that this is moderately expensive so we avoid calling it
288 // on every allocation.
289 static void check_for_peak(void)
291 tl_assert(clo_mode
== Heap
);
293 if (g_curr_bytes
== g_max_bytes
) {
294 // It's a peak. (If there are multiple equal peaks we record the latest
297 VG_(initIterFM
)(ppinfo
);
298 while (VG_(nextIterFM
)(ppinfo
, &keyW
, &valW
)) {
299 PPInfo
* ppi
= (PPInfo
*)valW
;
300 tl_assert(ppi
&& ppi
->ec
== (ExeContext
*)keyW
);
301 ppi
->at_tgmax_blocks
= ppi
->curr_blocks
;
302 ppi
->at_tgmax_bytes
= ppi
->curr_bytes
;
304 VG_(doneIterFM
)(ppinfo
);
308 /* 'bk' is being introduced (has just been allocated). Find the
309 relevant PPInfo entry for it, or create one, based on the block's
310 allocation EC. Then, update the PPInfo to the extent that we
311 actually can, to reflect the allocation. */
312 static void intro_Block(Block
* bk
)
320 Bool found
= VG_(lookupFM
)( ppinfo
,
321 &keyW
, &valW
, (UWord
)bk
->ec
);
324 tl_assert(keyW
== (UWord
)bk
->ec
);
326 ppi
= VG_(malloc
)( "dh.intro_Block.1", sizeof(PPInfo
) );
327 VG_(memset
)(ppi
, 0, sizeof(*ppi
));
329 Bool present
= VG_(addToFM
)( ppinfo
,
330 (UWord
)bk
->ec
, (UWord
)ppi
);
332 if (clo_mode
== Heap
) {
334 tl_assert(ppi
->freed_blocks
== 0);
335 ppi
->xsize_tag
= Unknown
;
337 if (0) VG_(printf
)("ppi %p --> Unknown\n", ppi
);
341 tl_assert(ppi
->ec
== bk
->ec
);
343 // Update global stats and PPInfo stats.
346 g_total_bytes
+= bk
->req_szB
;
349 ppi
->total_bytes
+= bk
->req_szB
;
351 if (clo_mode
== Heap
) {
353 g_curr_bytes
+= bk
->req_szB
;
356 ppi
->curr_bytes
+= bk
->req_szB
;
358 // The use of `>=` rather than `>` means that if there are multiple equal
359 // peaks we record the latest one, like `check_for_peak` does.
360 if (g_curr_bytes
>= g_max_bytes
) {
361 g_max_blocks
= g_curr_blocks
;
362 g_max_bytes
= g_curr_bytes
;
363 g_tgmax_instrs
= g_curr_instrs
;
365 ppi
->max_blocks
= ppi
->curr_blocks
;
366 ppi
->max_bytes
= ppi
->curr_bytes
;
371 /* 'bk' is retiring (being freed). Find the relevant PPInfo entry for
372 it, which must already exist. Then, fold info from 'bk' into that
373 entry. 'because_freed' is True if the block is retiring because
374 the client has freed it. If it is False then the block is retiring
375 because the program has finished, in which case we want to skip the
376 updates of the total blocks live etc for this PP, but still fold in
377 the access counts and histo data that have so far accumulated for
379 static void retire_Block(Block
* bk
, Bool because_freed
)
381 tl_assert(clo_mode
== Heap
);
388 Bool found
= VG_(lookupFM
)( ppinfo
,
389 &keyW
, &valW
, (UWord
)bk
->ec
);
392 tl_assert(ppi
->ec
== bk
->ec
);
394 // update stats following this free.
396 VG_(printf
)("ec %p ppi->c_by_l %llu bk->rszB %llu\n",
397 bk
->ec
, ppi
->curr_bytes
, (ULong
)bk
->req_szB
);
400 // Total bytes is coming down from a possible peak.
403 // Then update global stats.
404 tl_assert(g_curr_blocks
>= 1);
405 tl_assert(g_curr_bytes
>= bk
->req_szB
);
407 g_curr_bytes
-= bk
->req_szB
;
409 // Then update PPInfo stats.
410 tl_assert(ppi
->curr_blocks
>= 1);
411 tl_assert(ppi
->curr_bytes
>= bk
->req_szB
);
413 ppi
->curr_bytes
-= bk
->req_szB
;
418 tl_assert(bk
->allocd_at
<= g_curr_instrs
);
419 ppi
->total_lifetimes_instrs
+= (g_curr_instrs
- bk
->allocd_at
);
422 ppi
->reads_bytes
+= bk
->reads_bytes
;
423 ppi
->writes_bytes
+= bk
->writes_bytes
;
424 g_reads_bytes
+= bk
->reads_bytes
;
425 g_writes_bytes
+= bk
->writes_bytes
;
427 // histo stuff. First, do state transitions for xsize/xsize_tag.
428 switch (ppi
->xsize_tag
) {
431 tl_assert(ppi
->xsize
== 0);
432 tl_assert(ppi
->freed_blocks
== 1 || ppi
->freed_blocks
== 0);
433 tl_assert(!ppi
->histo
);
434 ppi
->xsize_tag
= Exactly
;
435 ppi
->xsize
= bk
->req_szB
;
436 if (0) VG_(printf
)("ppi %p --> Exactly(%lu)\n", ppi
, ppi
->xsize
);
437 // and allocate the histo
439 ppi
->histo
= VG_(malloc
)("dh.retire_Block.1",
440 ppi
->xsize
* sizeof(UInt
));
441 VG_(memset
)(ppi
->histo
, 0, ppi
->xsize
* sizeof(UInt
));
446 //tl_assert(ppi->freed_blocks > 1);
447 if (bk
->req_szB
!= ppi
->xsize
) {
448 if (0) VG_(printf
)("ppi %p --> Mixed(%lu -> %lu)\n",
449 ppi
, ppi
->xsize
, bk
->req_szB
);
450 ppi
->xsize_tag
= Mixed
;
452 // deallocate the histo, if any
454 VG_(free
)(ppi
->histo
);
461 //tl_assert(ppi->freed_blocks > 1);
468 // See if we can fold the histo data from this block into
469 // the data for the PP.
470 if (ppi
->xsize_tag
== Exactly
&& ppi
->histo
&& bk
->histoW
) {
471 tl_assert(ppi
->xsize
== bk
->req_szB
);
473 for (i
= 0; i
< ppi
->xsize
; i
++) {
474 // FIXME: do something better in case of overflow of ppi->histo[..]
475 // Right now, at least don't let it overflow/wrap around
476 if (ppi
->histo
[i
] <= 0xFFFE0000)
477 ppi
->histo
[i
] += (UInt
)bk
->histoW
[i
];
479 if (0) VG_(printf
)("fold in, PP = %p\n", ppi
);
484 VG_(printf
)("block retiring, histo %lu: ", bk
->req_szB
);
486 for (i
= 0; i
< bk
->req_szB
; i
++)
487 VG_(printf
)("%u ", (UInt
)bk
->histoB
[i
]);
490 VG_(printf
)("block retiring, no histo %lu\n", bk
->req_szB
);
495 /* This handles block resizing. When a block with PP 'ec' has a
496 size change of 'delta', call here to update the PPInfo. */
497 static void resize_Block(ExeContext
* ec
, SizeT old_req_szB
, SizeT new_req_szB
)
499 tl_assert(clo_mode
== Heap
);
501 Long delta
= (Long
)new_req_szB
- (Long
)old_req_szB
;
505 Bool found
= VG_(lookupFM
)( ppinfo
,
506 &keyW
, &valW
, (UWord
)ec
);
510 tl_assert(ppi
->ec
== ec
);
513 tl_assert(ppi
->curr_bytes
>= -delta
);
514 tl_assert(g_curr_bytes
>= -delta
);
516 // Total bytes might be coming down from a possible peak.
520 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
521 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
523 // A reasonable alternative would be to leave total_blocks unchanged and
524 // increment total_bytes by delta (but only if delta is positive). But then
525 // calls to realloc wouldn't be counted towards the total_blocks count,
526 // which is undesirable.
528 // Update global stats and PPInfo stats.
531 g_total_bytes
+= new_req_szB
;
534 ppi
->total_bytes
+= new_req_szB
;
536 g_curr_blocks
+= 0; // unchanged
537 g_curr_bytes
+= delta
;
539 ppi
->curr_blocks
+= 0; // unchanged
540 ppi
->curr_bytes
+= delta
;
542 // The use of `>=` rather than `>` means that if there are multiple equal
543 // peaks we record the latest one, like `check_for_peak` does.
544 if (g_curr_bytes
>= g_max_bytes
) {
545 g_max_blocks
= g_curr_blocks
;
546 g_max_bytes
= g_curr_bytes
;
547 g_tgmax_instrs
= g_curr_instrs
;
549 ppi
->max_blocks
= ppi
->curr_blocks
;
550 ppi
->max_bytes
= ppi
->curr_bytes
;
554 //------------------------------------------------------------//
555 //--- update both Block and PPInfos after {m,re}alloc/free ---//
556 //------------------------------------------------------------//
559 void* new_block ( ThreadId tid
, void* p
, SizeT req_szB
, SizeT req_alignB
,
562 tl_assert(p
== NULL
); // don't handle custom allocators right now
565 if ((SSizeT
)req_szB
< 0) return NULL
;
568 req_szB
= 1; /* can't allow zero-sized blocks in the interval tree */
571 // Allocate and zero if necessary
573 p
= VG_(cli_malloc
)( req_alignB
, req_szB
);
577 if (is_zeroed
) VG_(memset
)(p
, 0, req_szB
);
578 actual_szB
= VG_(cli_malloc_usable_size
)(p
);
579 tl_assert(actual_szB
>= req_szB
);
582 if (clo_mode
!= Heap
) {
586 // Make new Block, add to interval_tree.
587 Block
* bk
= VG_(malloc
)("dh.new_block.1", sizeof(Block
));
588 bk
->payload
= (Addr
)p
;
589 bk
->req_szB
= req_szB
;
590 bk
->ec
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
591 bk
->allocd_at
= g_curr_instrs
;
593 bk
->writes_bytes
= 0;
594 // Set up histogram array, if the block isn't too large.
596 if (req_szB
<= HISTOGRAM_SIZE_LIMIT
) {
597 bk
->histoW
= VG_(malloc
)("dh.new_block.2", req_szB
* sizeof(UShort
));
598 VG_(memset
)(bk
->histoW
, 0, req_szB
* sizeof(UShort
));
601 Bool present
= VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
603 fbc_cache0
= fbc_cache1
= NULL
;
611 void die_block ( void* p
)
615 if (clo_mode
!= Heap
) {
619 Block
* bk
= find_Block_containing( (Addr
)p
);
621 return; // bogus free
624 tl_assert(bk
->req_szB
> 0);
625 // assert the block finder is behaving sanely
626 tl_assert(bk
->payload
<= (Addr
)p
);
627 tl_assert( (Addr
)p
< bk
->payload
+ bk
->req_szB
);
629 if (bk
->payload
!= (Addr
)p
) {
630 return; // bogus free
633 retire_Block(bk
, True
/*because_freed*/);
635 delete_Block_starting_at( bk
->payload
);
637 VG_(free
)( bk
->histoW
);
644 void* renew_block ( ThreadId tid
, void* p_old
, SizeT new_req_szB
)
648 tl_assert(new_req_szB
> 0); // map 0 to 1
650 if (clo_mode
!= Heap
) {
651 SizeT old_actual_szB
= VG_(cli_malloc_usable_size
)(p_old
);
652 p_new
= VG_(cli_malloc
)(VG_(clo_alignment
), new_req_szB
);
656 VG_(memmove
)(p_new
, p_old
, VG_MIN(old_actual_szB
, new_req_szB
));
657 VG_(cli_free
)(p_old
);
661 // Find the old block.
662 Block
* bk
= find_Block_containing( (Addr
)p_old
);
664 return NULL
; // bogus realloc
667 tl_assert(bk
->req_szB
> 0);
668 // Assert the block finder is behaving sanely.
669 tl_assert(bk
->payload
<= (Addr
)p_old
);
670 tl_assert( (Addr
)p_old
< bk
->payload
+ bk
->req_szB
);
672 if (bk
->payload
!= (Addr
)p_old
) {
673 return NULL
; // bogus realloc
676 // Keeping the histogram alive in any meaningful way across
677 // block resizing is too darn complicated. Just throw it away.
679 VG_(free
)(bk
->histoW
);
683 // Actually do the allocation, if necessary.
684 if (new_req_szB
<= bk
->req_szB
) {
685 // New size is smaller or same; block not moved.
686 resize_Block(bk
->ec
, bk
->req_szB
, new_req_szB
);
687 bk
->req_szB
= new_req_szB
;
689 // Update reads/writes for the implicit copy. Even though we didn't
690 // actually do a copy, we act like we did, to match up with the fact
691 // that we treat this as an additional allocation.
692 bk
->reads_bytes
+= new_req_szB
;
693 bk
->writes_bytes
+= new_req_szB
;
698 // New size is bigger; make new block, copy shared contents, free old.
699 p_new
= VG_(cli_malloc
)(VG_(clo_alignment
), new_req_szB
);
701 // Nb: if realloc fails, NULL is returned but the old block is not
702 // touched. What an awful function.
705 tl_assert(p_new
!= p_old
);
707 VG_(memcpy
)(p_new
, p_old
, bk
->req_szB
);
708 VG_(cli_free
)(p_old
);
710 // Since the block has moved, we need to re-insert it into the
711 // interval tree at the new place. Do this by removing
713 delete_Block_starting_at( (Addr
)p_old
);
714 // Now 'bk' is no longer in the tree, but the Block itself
717 // Update reads/writes for the copy.
718 bk
->reads_bytes
+= bk
->req_szB
;
719 bk
->writes_bytes
+= bk
->req_szB
;
721 // Update the metadata.
722 resize_Block(bk
->ec
, bk
->req_szB
, new_req_szB
);
723 bk
->payload
= (Addr
)p_new
;
724 bk
->req_szB
= new_req_szB
;
726 // And re-add it to the interval tree.
728 = VG_(addToFM
)( interval_tree
, (UWord
)bk
, (UWord
)0/*no val*/);
730 fbc_cache0
= fbc_cache1
= NULL
;
736 //------------------------------------------------------------//
737 //--- malloc() et al replacement wrappers ---//
738 //------------------------------------------------------------//
740 static void* dh_malloc ( ThreadId tid
, SizeT szB
)
742 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
745 static void* dh___builtin_new ( ThreadId tid
, SizeT szB
)
747 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
750 static void* dh___builtin_new_aligned ( ThreadId tid
, SizeT szB
, SizeT alignB
)
752 return new_block( tid
, NULL
, szB
, alignB
, /*is_zeroed*/False
);
755 static void* dh___builtin_vec_new ( ThreadId tid
, SizeT szB
)
757 return new_block( tid
, NULL
, szB
, VG_(clo_alignment
), /*is_zeroed*/False
);
760 static void* dh___builtin_vec_new_aligned ( ThreadId tid
, SizeT szB
, SizeT alignB
)
762 return new_block( tid
, NULL
, szB
, alignB
, /*is_zeroed*/False
);
765 static void* dh_calloc ( ThreadId tid
, SizeT m
, SizeT szB
)
767 return new_block( tid
, NULL
, m
*szB
, VG_(clo_alignment
), /*is_zeroed*/True
);
770 static void *dh_memalign ( ThreadId tid
, SizeT alignB
, SizeT szB
)
772 return new_block( tid
, NULL
, szB
, alignB
, False
);
775 static void dh_free ( ThreadId tid
__attribute__((unused
)), void* p
)
780 static void dh___builtin_delete ( ThreadId tid
, void* p
)
785 static void dh___builtin_delete_aligned ( ThreadId tid
, void* p
, SizeT align
)
790 static void dh___builtin_vec_delete ( ThreadId tid
, void* p
)
795 static void dh___builtin_vec_delete_aligned ( ThreadId tid
, void* p
, SizeT align
)
800 static void* dh_realloc ( ThreadId tid
, void* p_old
, SizeT new_szB
)
803 return dh_malloc(tid
, new_szB
);
809 return renew_block(tid
, p_old
, new_szB
);
812 static SizeT
dh_malloc_usable_size ( ThreadId tid
, void* p
)
814 if (clo_mode
!= Heap
) {
815 return VG_(cli_malloc_usable_size
)(p
);
818 Block
* bk
= find_Block_containing( (Addr
)p
);
819 return bk
? bk
->req_szB
: 0;
822 //------------------------------------------------------------//
823 //--- memory references ---//
824 //------------------------------------------------------------//
827 void inc_histo_for_block ( Block
* bk
, Addr addr
, UWord szB
)
829 UWord i
, offMin
, offMax1
;
830 offMin
= addr
- bk
->payload
;
831 tl_assert(offMin
< bk
->req_szB
);
832 offMax1
= offMin
+ szB
;
833 if (offMax1
> bk
->req_szB
)
834 offMax1
= bk
->req_szB
;
835 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
836 for (i
= offMin
; i
< offMax1
; i
++) {
837 UShort n
= bk
->histoW
[i
];
844 void dh_handle_write ( Addr addr
, UWord szB
)
846 tl_assert(clo_mode
== Heap
);
848 Block
* bk
= find_Block_containing(addr
);
850 bk
->writes_bytes
+= szB
;
852 inc_histo_for_block(bk
, addr
, szB
);
857 void dh_handle_read ( Addr addr
, UWord szB
)
859 tl_assert(clo_mode
== Heap
);
861 Block
* bk
= find_Block_containing(addr
);
863 bk
->reads_bytes
+= szB
;
865 inc_histo_for_block(bk
, addr
, szB
);
869 // Handle reads and writes by syscalls (read == kernel
870 // reads user space, write == kernel writes user space).
871 // Assumes no such read or write spans a heap block
872 // boundary and so we can treat it just as one giant
875 void dh_handle_noninsn_read ( CorePart part
, ThreadId tid
, const HChar
* s
,
876 Addr base
, SizeT size
)
878 tl_assert(clo_mode
== Heap
);
882 dh_handle_read(base
, size
);
884 case Vg_CoreSysCallArgInMem
:
886 case Vg_CoreTranslate
:
894 void dh_handle_noninsn_read_asciiz(CorePart part
, ThreadId tid
, const HChar
* s
,
897 tl_assert(clo_mode
== Heap
);
899 tl_assert(part
== Vg_CoreSysCall
);
900 dh_handle_noninsn_read(part
, tid
, s
, str
, VG_(strlen
)((const HChar
*)str
+1));
904 void dh_handle_noninsn_write ( CorePart part
, ThreadId tid
,
905 Addr base
, SizeT size
)
907 tl_assert(clo_mode
== Heap
);
911 case Vg_CoreClientReq
:
912 dh_handle_write(base
, size
);
921 //------------------------------------------------------------//
922 //--- Instrumentation ---//
923 //------------------------------------------------------------//
925 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
926 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
927 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
928 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
929 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
932 void add_counter_update(IRSB
* sbOut
, Int n
)
934 #if defined(VG_BIGENDIAN)
936 #elif defined(VG_LITTLEENDIAN)
939 # error "Unknown endianness"
941 // Add code to increment 'g_curr_instrs' by 'n', like this:
942 // WrTmp(t1, Load64(&g_curr_instrs))
943 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
944 // Store(&g_curr_instrs, t2)
945 IRTemp t1
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
946 IRTemp t2
= newIRTemp(sbOut
->tyenv
, Ity_I64
);
947 IRExpr
* counter_addr
= mkIRExpr_HWord( (HWord
)&g_curr_instrs
);
949 IRStmt
* st1
= assign(t1
, IRExpr_Load(END
, Ity_I64
, counter_addr
));
950 IRStmt
* st2
= assign(t2
, binop(Iop_Add64
, mkexpr(t1
), mkU64(n
)));
951 IRStmt
* st3
= IRStmt_Store(END
, counter_addr
, mkexpr(t2
));
953 addStmtToIRSB( sbOut
, st1
);
954 addStmtToIRSB( sbOut
, st2
);
955 addStmtToIRSB( sbOut
, st3
);
959 void addMemEvent(IRSB
* sbOut
, Bool isWrite
, Int szB
, IRExpr
* addr
,
962 if (clo_mode
!= Heap
) {
966 IRType tyAddr
= Ity_INVALID
;
967 const HChar
* hName
= NULL
;
969 IRExpr
** argv
= NULL
;
972 const Int THRESH
= 4096 * 4; // somewhat arbitrary
973 const Int rz_szB
= VG_STACK_REDZONE_SZB
;
975 tyAddr
= typeOfIRExpr( sbOut
->tyenv
, addr
);
976 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
979 hName
= "dh_handle_write";
980 hAddr
= &dh_handle_write
;
982 hName
= "dh_handle_read";
983 hAddr
= &dh_handle_read
;
986 argv
= mkIRExprVec_2( addr
, mkIRExpr_HWord(szB
) );
988 /* Add the helper. */
992 di
= unsafeIRDirty_0_N( 2/*regparms*/,
993 hName
, VG_(fnptr_to_fnentry
)( hAddr
),
996 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
997 some arbitrary N. If that fails then addr is in the range (SP -
998 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
999 addr is within a page of SP and so can't possibly be a heap
1000 access, and so can be skipped. */
1001 IRTemp sp
= newIRTemp(sbOut
->tyenv
, tyAddr
);
1002 addStmtToIRSB( sbOut
, assign(sp
, IRExpr_Get(goff_sp
, tyAddr
)));
1004 IRTemp sp_minus_rz
= newIRTemp(sbOut
->tyenv
, tyAddr
);
1009 ? binop(Iop_Sub32
, mkexpr(sp
), mkU32(rz_szB
))
1010 : binop(Iop_Sub64
, mkexpr(sp
), mkU64(rz_szB
)))
1013 IRTemp diff
= newIRTemp(sbOut
->tyenv
, tyAddr
);
1018 ? binop(Iop_Sub32
, addr
, mkexpr(sp_minus_rz
))
1019 : binop(Iop_Sub64
, addr
, mkexpr(sp_minus_rz
)))
1022 IRTemp guard
= newIRTemp(sbOut
->tyenv
, Ity_I1
);
1027 ? binop(Iop_CmpLT32U
, mkU32(THRESH
), mkexpr(diff
))
1028 : binop(Iop_CmpLT64U
, mkU64(THRESH
), mkexpr(diff
)))
1030 di
->guard
= mkexpr(guard
);
1032 addStmtToIRSB( sbOut
, IRStmt_Dirty(di
) );
1036 IRSB
* dh_instrument ( VgCallbackClosure
* closure
,
1038 const VexGuestLayout
* layout
,
1039 const VexGuestExtents
* vge
,
1040 const VexArchInfo
* archinfo_host
,
1041 IRType gWordTy
, IRType hWordTy
)
1045 IRTypeEnv
* tyenv
= sbIn
->tyenv
;
1047 const Int goff_sp
= layout
->offset_SP
;
1049 // We increment the instruction count in two places:
1050 // - just before any Ist_Exit statements;
1051 // - just before the IRSB's end.
1052 // In the former case, we zero 'n' and then continue instrumenting.
1054 sbOut
= deepCopyIRSBExceptStmts(sbIn
);
1056 // Copy verbatim any IR preamble preceding the first IMark
1058 while (i
< sbIn
->stmts_used
&& sbIn
->stmts
[i
]->tag
!= Ist_IMark
) {
1059 addStmtToIRSB( sbOut
, sbIn
->stmts
[i
] );
1063 for (/*use current i*/; i
< sbIn
->stmts_used
; i
++) {
1064 IRStmt
* st
= sbIn
->stmts
[i
];
1066 if (!st
|| st
->tag
== Ist_NoOp
) continue;
1077 // Add an increment before the Exit statement, then reset 'n'.
1078 add_counter_update(sbOut
, n
);
1085 IRExpr
* data
= st
->Ist
.WrTmp
.data
;
1086 if (data
->tag
== Iex_Load
) {
1087 IRExpr
* aexpr
= data
->Iex
.Load
.addr
;
1088 // Note also, endianness info is ignored. I guess
1089 // that's not interesting.
1090 addMemEvent( sbOut
, False
/*!isWrite*/,
1091 sizeofIRType(data
->Iex
.Load
.ty
),
1098 IRExpr
* data
= st
->Ist
.Store
.data
;
1099 IRExpr
* aexpr
= st
->Ist
.Store
.addr
;
1100 addMemEvent( sbOut
, True
/*isWrite*/,
1101 sizeofIRType(typeOfIRExpr(tyenv
, data
)),
1108 IRDirty
* d
= st
->Ist
.Dirty
.details
;
1109 if (d
->mFx
!= Ifx_None
) {
1110 /* This dirty helper accesses memory. Collect the details. */
1111 tl_assert(d
->mAddr
!= NULL
);
1112 tl_assert(d
->mSize
!= 0);
1113 dataSize
= d
->mSize
;
1114 // Large (eg. 28B, 108B, 512B on x86) data-sized
1115 // instructions will be done inaccurately, but they're
1116 // very rare and this avoids errors from hitting more
1117 // than two cache lines in the simulation.
1118 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
)
1119 addMemEvent( sbOut
, False
/*!isWrite*/,
1120 dataSize
, d
->mAddr
, goff_sp
);
1121 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
)
1122 addMemEvent( sbOut
, True
/*isWrite*/,
1123 dataSize
, d
->mAddr
, goff_sp
);
1125 tl_assert(d
->mAddr
== NULL
);
1126 tl_assert(d
->mSize
== 0);
1132 /* We treat it as a read and a write of the location. I
1133 think that is the same behaviour as it was before IRCAS
1134 was introduced, since prior to that point, the Vex
1135 front ends would translate a lock-prefixed instruction
1136 into a (normal) read followed by a (normal) write. */
1138 IRCAS
* cas
= st
->Ist
.CAS
.details
;
1139 tl_assert(cas
->addr
!= NULL
);
1140 tl_assert(cas
->dataLo
!= NULL
);
1141 dataSize
= sizeofIRType(typeOfIRExpr(tyenv
, cas
->dataLo
));
1142 if (cas
->dataHi
!= NULL
)
1143 dataSize
*= 2; /* since it's a doubleword-CAS */
1144 addMemEvent( sbOut
, False
/*!isWrite*/,
1145 dataSize
, cas
->addr
, goff_sp
);
1146 addMemEvent( sbOut
, True
/*isWrite*/,
1147 dataSize
, cas
->addr
, goff_sp
);
1153 if (st
->Ist
.LLSC
.storedata
== NULL
) {
1155 dataTy
= typeOfIRTemp(tyenv
, st
->Ist
.LLSC
.result
);
1156 addMemEvent( sbOut
, False
/*!isWrite*/,
1157 sizeofIRType(dataTy
),
1158 st
->Ist
.LLSC
.addr
, goff_sp
);
1161 dataTy
= typeOfIRExpr(tyenv
, st
->Ist
.LLSC
.storedata
);
1162 addMemEvent( sbOut
, True
/*isWrite*/,
1163 sizeofIRType(dataTy
),
1164 st
->Ist
.LLSC
.addr
, goff_sp
);
1173 addStmtToIRSB( sbOut
, st
);
1177 // Add an increment before the SB end.
1178 add_counter_update(sbOut
, n
);
1189 //------------------------------------------------------------//
1190 //--- Client requests ---//
1191 //------------------------------------------------------------//
1193 static Bool
dh_handle_client_request(ThreadId tid
, UWord
* arg
, UWord
* ret
)
1196 case VG_USERREQ__DHAT_AD_HOC_EVENT
: {
1197 if (clo_mode
!= AdHoc
) {
1201 SizeT len
= (SizeT
)arg
[1];
1203 // Only the ec and req_szB fields are used by intro_Block().
1205 VG_(memset
)(&bk
, 0, sizeof(bk
));
1207 bk
.ec
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
1214 case _VG_USERREQ__DHAT_COPY
: {
1215 SizeT len
= (SizeT
)arg
[1];
1217 if (clo_mode
!= Copy
) {
1221 // Only the ec and req_szB fields are used by intro_Block().
1223 VG_(memset
)(&bk
, 0, sizeof(bk
));
1225 bk
.ec
= VG_(record_ExeContext
)(tid
, 0/*first word delta*/);
1235 "Warning: unknown DHAT client request code %llx\n",
1242 //------------------------------------------------------------//
1243 //--- Finalisation ---//
1244 //------------------------------------------------------------//
1246 // File format notes.
1248 // - The files are JSON, because it's a widely-used format and saves us having
1249 // to write a parser in dh_view.js.
1251 // - We use a comma-first style for the generated JSON. Comma-first style
1252 // moves the special case for arrays/objects from the last item to the
1253 // first. This helps in cases where you can't easily tell in advance the
1254 // size of arrays/objects, such as iterating over a WordFM (because
1255 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1256 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1258 // - We use short field names and minimal whitespace to minimize file sizes.
1263 // // Version number of the format. Incremented on each
1264 // // backwards-incompatible change. A mandatory integer.
1265 // "dhatFileVersion": 2,
1267 // // The invocation mode. A mandatory, free-form string.
1270 // // The verb used before above stack frames, i.e. "<verb> at {". A
1271 // // mandatory string.
1272 // "verb": "Allocated",
1274 // // Are block lifetimes recorded? Affects whether some other fields are
1275 // // present. A mandatory boolean.
1278 // // Are block accesses recorded? Affects whether some other fields are
1279 // // present. A mandatory boolean.
1282 // // Byte/bytes/blocks-position units. Optional strings. "byte", "bytes",
1283 // // and "blocks" are the values used if these fields are omitted.
1284 // "bu": "byte", "bsu": "bytes", "bksu": "blocks",
1286 // // Time units (individual and 1,000,000x). Mandatory strings.
1287 // "tu": "instrs", "Mtu": "Minstr"
1289 // // The "short-lived" time threshold, measures in "tu"s.
1290 // // - bklt=true: a mandatory integer.
1291 // // - bklt=false: omitted.
1294 // // The executed command. A mandatory string.
1297 // // The process ID. A mandatory integer.
1300 // // The time at the end of execution (t-end). A mandatory integer.
1303 // // The time of the global max (t-gmax).
1304 // // - bklt=true: a mandatory integer.
1305 // // - bklt=false: omitted.
1308 // // The program points. A mandatory array.
1311 // // Total bytes and blocks. Mandatory integers.
1312 // "tb": 5, "tbk": 1,
1314 // // Total lifetimes of all blocks allocated at this PP.
1315 // // - bklt=true: a mandatory integer.
1316 // // - bklt=false: omitted.
1319 // // The maximum bytes and blocks for this PP.
1320 // // - bklt=true: mandatory integers.
1321 // // - bklt=false: omitted.
1322 // "mb": 5, "mbk": 1,
1324 // // The bytes and blocks at t-gmax for this PP.
1325 // // - bklt=true: mandatory integers.
1326 // // - bklt=false: omitted.
1327 // "gb": 0, "gbk": 0,
1329 // // The bytes and blocks at t-end for this PP.
1330 // // - bklt=true: mandatory integers.
1331 // // - bklt=false: omitted.
1332 // "eb": 0, "ebk": 0,
1334 // // The reads and writes of blocks for this PP.
1335 // // - bkacc=true: mandatory integers.
1336 // // - bkacc=false: omitted.
1337 // "rb": 41, "wb": 5,
1339 // // The exact accesses of blocks for this PP. Only used when all
1340 // // allocations are the same size and sufficiently small. A negative
1341 // // element indicates run-length encoding of the following integer.
1342 // // E.g. `-3, 4` means "three 4s in a row".
1343 // // - bkacc=true: an optional array of integers.
1344 // // - bkacc=false: omitted.
1345 // "acc": [5, -3, 4, 2],
1347 // // Frames. Each element is an index into the "ftbl" array below.
1348 // // - All modes: A mandatory array of integers.
1353 // // Frame table. A mandatory array of strings.
1356 // "0x4AA1D9F: _nl_normalize_codeset (l10nflist.c:332)",
1357 // "0x4A9B414: _nl_load_locale_from_archive (loadarchive.c:173)",
1358 // "0x4A9A2BE: _nl_find_locale (findlocale.c:153)"
1364 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1366 // The frame table holds unique frames.
1367 static WordFM
* frame_tbl
= NULL
;
1368 static UWord next_frame_n
= 0;
1370 static Word
frame_cmp(UWord a
, UWord b
)
1372 return VG_(strcmp
)((const HChar
*)a
, (const HChar
*)b
);
1375 static HChar
hex_digit_to_ascii_char(UChar d
)
1378 return (d
< 10) ? ('0' + d
) : ('a' + (d
- 10));
1381 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1383 // Returns the original string if no escaping was required. Returns a pointer
1384 // to a static buffer if escaping was required. Therefore, the return value is
1385 // only valid until the next call to this function.
1386 static const HChar
* json_escape(const HChar
* s
)
1388 static HChar
* buf
= NULL
;
1389 static SizeT bufcap
= 0;
1391 // Do we need any escaping?
1396 if (c
== '"' || c
== '\\') {
1398 } else if (c
<= 0x1f) {
1406 // No escaping needed.
1410 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1412 SizeT newcap
= len
+ extra
+ 1;
1413 if (bufcap
< newcap
) {
1414 buf
= VG_(realloc
)("dh.json", buf
, newcap
);
1425 } else if (c
== '\\') {
1428 } else if (c
<= 0x1f) {
1433 *q
++ = hex_digit_to_ascii_char((c
& 0x00f0) >> 4);
1434 *q
++ = hex_digit_to_ascii_char(c
& 0x000f);
1445 static void write_PPInfo_frame(UInt n
, DiEpoch ep
, Addr ip
, void* opaque
)
1447 Bool
* is_first
= (Bool
*)opaque
;
1448 InlIPCursor
* iipc
= VG_(new_IIPC
)(ep
, ip
);
1451 const HChar
* buf
= VG_(describe_IP
)(ep
, ip
, iipc
);
1453 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1454 // `realloc`, `operator new`) because they're boring and clog up the
1456 if (VG_(strstr
)(buf
, "vg_replace_malloc.c")) {
1460 // If this description has been seen before, get its number. Otherwise,
1461 // give it a new number and put it in the table.
1462 UWord keyW
= 0, valW
= 0;
1464 Bool found
= VG_(lookupFM
)(frame_tbl
, &keyW
, &valW
, (UWord
)buf
);
1466 //const HChar* str = (const HChar*)keyW;
1467 //tl_assert(0 == VG_(strcmp)(buf, str));
1470 // `buf` is a static buffer, we must copy it.
1471 const HChar
* str
= VG_(strdup
)("dh.frame_tbl.3", buf
);
1472 frame_n
= next_frame_n
++;
1473 Bool present
= VG_(addToFM
)(frame_tbl
, (UWord
)str
, frame_n
);
1474 tl_assert(!present
);
1477 FP("%c%lu", *is_first
? '[' : ',', frame_n
);
1480 } while (VG_(next_IIPC
)(iipc
));
1482 VG_(delete_IIPC
)(iipc
);
1485 static void write_PPInfo(PPInfo
* ppi
, Bool is_first
)
1487 FP(" %c{\"tb\":%llu,\"tbk\":%llu\n",
1488 is_first
? '[' : ',',
1489 ppi
->total_bytes
, ppi
->total_blocks
);
1491 if (clo_mode
== Heap
) {
1492 tl_assert(ppi
->total_blocks
>= ppi
->max_blocks
);
1493 tl_assert(ppi
->total_bytes
>= ppi
->max_bytes
);
1495 FP(" ,\"tl\":%llu\n",
1496 ppi
->total_lifetimes_instrs
);
1497 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1498 ppi
->max_bytes
, ppi
->max_blocks
);
1499 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1500 ppi
->at_tgmax_bytes
, ppi
->at_tgmax_blocks
);
1501 FP(" ,\"eb\":%llu,\"ebk\":%llu\n",
1502 ppi
->curr_bytes
, ppi
->curr_blocks
);
1503 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1504 ppi
->reads_bytes
, ppi
->writes_bytes
);
1506 if (ppi
->histo
&& ppi
->xsize_tag
== Exactly
) {
1509 // Simple run-length encoding: when N entries in a row have the same
1510 // value M, we print "-N,M". If there is just one in a row, we just
1511 // print "M". This reduces file size significantly.
1514 for (UWord i
= 0; i
< ppi
->xsize
; i
++) {
1515 UShort h
= ppi
->histo
[i
];
1517 // Continue current run.
1520 // End of run; print it.
1523 } else if (reps
> 1) {
1524 FP("-%d,%u,", reps
, repval
);
1530 // Print the final run.
1533 } else if (reps
> 1) {
1534 FP("-%d,%u", reps
, repval
);
1540 tl_assert(ppi
->curr_bytes
== 0);
1541 tl_assert(ppi
->curr_blocks
== 0);
1542 tl_assert(ppi
->max_bytes
== 0);
1543 tl_assert(ppi
->max_blocks
== 0);
1544 tl_assert(ppi
->at_tgmax_bytes
== 0);
1545 tl_assert(ppi
->at_tgmax_blocks
== 0);
1546 tl_assert(ppi
->total_lifetimes_instrs
== 0);
1547 tl_assert(ppi
->freed_blocks
== 0);
1548 tl_assert(ppi
->reads_bytes
== 0);
1549 tl_assert(ppi
->writes_bytes
== 0);
1550 tl_assert(ppi
->xsize_tag
== 0);
1551 tl_assert(ppi
->xsize
== 0);
1552 tl_assert(ppi
->histo
== NULL
);
1556 Bool is_first_frame
= True
;
1557 VG_(apply_ExeContext
)(write_PPInfo_frame
, &is_first_frame
, ppi
->ec
);
1563 static void write_PPInfos(void)
1569 VG_(initIterFM
)(ppinfo
);
1570 Bool is_first
= True
;
1571 while (VG_(nextIterFM
)(ppinfo
, &keyW
, &valW
)) {
1572 PPInfo
* ppi
= (PPInfo
*)valW
;
1573 tl_assert(ppi
&& ppi
->ec
== (ExeContext
*)keyW
);
1574 write_PPInfo(ppi
, is_first
);
1577 VG_(doneIterFM
)(ppinfo
);
1580 // We didn't print any elements. This happens if ppinfo is empty.
1587 static void dh_fini(Int exit_status
)
1589 // This function does lots of allocations that it doesn't bother to free,
1590 // because execution is almost over anyway.
1594 // Total bytes might be at a possible peak.
1595 if (clo_mode
== Heap
) {
1598 // Before printing statistics, we must harvest various stats (such as
1599 // lifetimes and accesses) for all the blocks that are still alive.
1600 VG_(initIterFM
)( interval_tree
);
1601 while (VG_(nextIterFM
)( interval_tree
, &keyW
, &valW
)) {
1602 Block
* bk
= (Block
*)keyW
;
1603 tl_assert(valW
== 0);
1605 retire_Block(bk
, False
/*!because_freed*/);
1607 VG_(doneIterFM
)( interval_tree
);
1610 if (VG_(clo_stats
)) {
1611 VG_(dmsg
)(" dhat: find_Block_containing:\n");
1612 VG_(dmsg
)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1613 stats__n_fBc_cached
+ stats__n_fBc_uncached
,
1614 stats__n_fBc_cached
,
1615 stats__n_fBc_uncached
);
1616 VG_(dmsg
)(" notfound: %'lu\n", stats__n_fBc_notfound
);
1621 // Create the frame table, and insert the special "[root]" node at index 0.
1622 frame_tbl
= VG_(newFM
)(VG_(malloc
),
1626 const HChar
* root
= VG_(strdup
)("dh.frame_tbl.2", "[root]");
1627 Bool present
= VG_(addToFM
)(frame_tbl
, (UWord
)root
, 0);
1628 tl_assert(!present
);
1631 // Setup output filename. Nb: it's important to do this now, i.e. as late
1632 // as possible. If we do it at start-up and the program forks and the
1633 // output file format string contains a %p (pid) specifier, both the parent
1634 // and child will incorrectly write to the same file; this happened in
1636 HChar
* dhat_out_file
=
1637 VG_(expand_file_name
)("--dhat-out-file", clo_dhat_out_file
);
1639 fp
= VG_(fopen
)(dhat_out_file
, VKI_O_CREAT
|VKI_O_TRUNC
|VKI_O_WRONLY
,
1640 VKI_S_IRUSR
|VKI_S_IWUSR
);
1642 VG_(umsg
)("error: can't open DHAT output file '%s'\n", dhat_out_file
);
1643 VG_(free
)(dhat_out_file
);
1647 // Write to data file.
1648 FP("{\"dhatFileVersion\":2\n");
1650 // The output mode, block booleans, and byte/block units.
1651 if (clo_mode
== Heap
) {
1652 FP(",\"mode\":\"heap\",\"verb\":\"Allocated\"\n");
1653 FP(",\"bklt\":true,\"bkacc\":true\n");
1654 } else if (clo_mode
== Copy
) {
1655 FP(",\"mode\":\"copy\",\"verb\":\"Copied\"\n");
1656 FP(",\"bklt\":false,\"bkacc\":false\n");
1657 } else if (clo_mode
== AdHoc
) {
1658 FP(",\"mode\":\"ad-hoc\",\"verb\":\"Occurred\"\n");
1659 FP(",\"bklt\":false,\"bkacc\":false\n");
1660 FP(",\"bu\":\"unit\",\"bsu\":\"units\",\"bksu\":\"events\"\n");
1666 FP(",\"tu\":\"instrs\",\"Mtu\":\"Minstr\"\n");
1667 if (clo_mode
== Heap
) {
1668 FP(",\"tuth\":500\n");
1672 const HChar
* exe
= VG_(args_the_exename
);
1673 FP(",\"cmd\":\"%s", json_escape(exe
));
1674 for (Word i
= 0; i
< VG_(sizeXA
)(VG_(args_for_client
)); i
++) {
1675 const HChar
* arg
= *(HChar
**)VG_(indexXA
)(VG_(args_for_client
), i
);
1676 FP(" %s", json_escape(arg
));
1681 FP(",\"pid\":%d\n", VG_(getpid
)());
1684 FP(",\"te\":%llu\n", g_curr_instrs
);
1685 if (clo_mode
== Heap
) {
1686 FP(",\"tg\":%llu\n", g_tgmax_instrs
);
1688 tl_assert(g_tgmax_instrs
== 0);
1697 // The frame table maps strings to numbers. We want to print it ordered by
1698 // numbers. So we create an array and fill it in from the frame table, then
1700 UWord n_frames
= next_frame_n
;
1701 const HChar
** frames
=
1702 VG_(malloc
)("dh.frames", n_frames
* sizeof(const HChar
*));
1703 VG_(initIterFM
)(frame_tbl
);
1704 while (VG_(nextIterFM
)(frame_tbl
, &keyW
, &valW
)) {
1705 const HChar
* str
= (const HChar
*)keyW
;
1709 VG_(doneIterFM
)(frame_tbl
);
1711 for (UWord i
= 0; i
< n_frames
; i
++) {
1712 FP(" %c\"%s\"\n", i
== 0 ? '[' : ',', json_escape(frames
[i
]));
1722 if (VG_(clo_verbosity
) == 0) {
1726 // Print brief global stats.
1727 VG_(umsg
)("Total: %'llu %s in %'llu %s\n",
1728 g_total_bytes
, clo_mode
== AdHoc
? "units" : "bytes",
1729 g_total_blocks
, clo_mode
== AdHoc
? "events" : "blocks");
1730 if (clo_mode
== Heap
) {
1731 VG_(umsg
)("At t-gmax: %'llu bytes in %'llu blocks\n",
1732 g_max_bytes
, g_max_blocks
);
1733 VG_(umsg
)("At t-end: %'llu bytes in %'llu blocks\n",
1734 g_curr_bytes
, g_curr_blocks
);
1735 VG_(umsg
)("Reads: %'llu bytes\n", g_reads_bytes
);
1736 VG_(umsg
)("Writes: %'llu bytes\n", g_writes_bytes
);
1738 tl_assert(g_max_bytes
== 0);
1739 tl_assert(g_max_blocks
== 0);
1740 tl_assert(g_curr_bytes
== 0);
1741 tl_assert(g_curr_blocks
== 0);
1742 tl_assert(g_reads_bytes
== 0);
1743 tl_assert(g_writes_bytes
== 0);
1746 // Print a how-to-view-the-profile hint.
1748 VG_(umsg
)("To view the resulting profile, open\n");
1749 VG_(umsg
)(" file://%s/%s\n", DHAT_VIEW_DIR
, "dh_view.html");
1750 VG_(umsg
)("in a web browser, click on \"Load...\", "
1751 "and then select the file\n");
1752 VG_(umsg
)(" %s\n", dhat_out_file
);
1753 VG_(umsg
)("The text at the bottom explains the abbreviations used in the "
1756 VG_(free
)(dhat_out_file
);
1759 //------------------------------------------------------------//
1760 //--- Initialisation ---//
1761 //------------------------------------------------------------//
1763 static void dh_post_clo_init(void)
1765 if (clo_mode
== Heap
) {
1766 VG_(track_pre_mem_read
) ( dh_handle_noninsn_read
);
1767 VG_(track_pre_mem_read_asciiz
) ( dh_handle_noninsn_read_asciiz
);
1768 VG_(track_post_mem_write
) ( dh_handle_noninsn_write
);
1772 static void dh_pre_clo_init(void)
1774 VG_(details_name
) ("DHAT");
1775 VG_(details_version
) (NULL
);
1776 VG_(details_description
) ("a dynamic heap analysis tool");
1777 VG_(details_copyright_author
)(
1778 "Copyright (C) 2010-2018, and GNU GPL'd, by Mozilla Foundation");
1779 VG_(details_bug_reports_to
) (VG_BUGS_TO
);
1782 VG_(basic_tool_funcs
) (dh_post_clo_init
,
1787 VG_(needs_libc_freeres
)();
1788 VG_(needs_cxx_freeres
)();
1789 VG_(needs_command_line_options
)(dh_process_cmd_line_option
,
1791 dh_print_debug_usage
);
1792 VG_(needs_client_requests
) (dh_handle_client_request
);
1793 // VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1794 // dh_expensive_sanity_check);
1795 VG_(needs_malloc_replacement
)(dh_malloc
,
1797 dh___builtin_new_aligned
,
1798 dh___builtin_vec_new
,
1799 dh___builtin_vec_new_aligned
,
1803 dh___builtin_delete
,
1804 dh___builtin_delete_aligned
,
1805 dh___builtin_vec_delete
,
1806 dh___builtin_vec_delete_aligned
,
1808 dh_malloc_usable_size
,
1811 tl_assert(!interval_tree
);
1812 tl_assert(!fbc_cache0
);
1813 tl_assert(!fbc_cache1
);
1815 interval_tree
= VG_(newFM
)( VG_(malloc
),
1816 "dh.interval_tree.1",
1818 interval_tree_Cmp
);
1820 ppinfo
= VG_(newFM
)( VG_(malloc
),
1823 NULL
/*unboxedcmp*/ );
1826 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init
)
1828 //--------------------------------------------------------------------//
1829 //--- end dh_main.c ---//
1830 //--------------------------------------------------------------------//