Bug 414268 - Enable AArch64 feature detection and decoding for v8.x instructions...
[valgrind.git] / dhat / dh_main.c
blob90b1a965ac70841c33873dfd5c055abdfec37c7e
2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
6 /*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
30 #include "pub_tool_basics.h"
31 #include "pub_tool_clientstate.h"
32 #include "pub_tool_clreq.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
45 #include "dhat.h"
47 #define HISTOGRAM_SIZE_LIMIT 1024
49 //------------------------------------------------------------//
50 //--- Globals ---//
51 //------------------------------------------------------------//
53 // Values for the entire run.
54 static ULong g_total_blocks = 0;
55 static ULong g_total_bytes = 0;
57 // Current values. g_curr_blocks and g_curr_bytes are only used with
58 // clo_mode=Heap.
59 static ULong g_curr_blocks = 0;
60 static ULong g_curr_bytes = 0;
61 static ULong g_curr_instrs = 0; // incremented from generated code
63 // Values at the global max, i.e. when g_curr_bytes peaks.
64 // Only used with clo_mode=Heap.
65 static ULong g_max_blocks = 0;
66 static ULong g_max_bytes = 0;
68 // Time of the global max.
69 static ULong g_tgmax_instrs = 0;
71 // Values for the entire run. Updated each time a block is retired.
72 // Only used with clo_mode=Heap.
73 static ULong g_reads_bytes = 0;
74 static ULong g_writes_bytes = 0;
76 //------------------------------------------------------------//
77 //--- Command line args ---//
78 //------------------------------------------------------------//
80 typedef enum { Heap=55, Copy, AdHoc } ProfileKind;
82 static ProfileKind clo_mode = Heap;
84 static const HChar* clo_dhat_out_file = "dhat.out.%p";
86 static Bool dh_process_cmd_line_option(const HChar* arg)
88 if VG_STR_CLO(arg, "--dhat-out-file", clo_dhat_out_file) {
90 } else if (VG_XACT_CLO(arg, "--mode=heap", clo_mode, Heap)) {
91 } else if (VG_XACT_CLO(arg, "--mode=copy", clo_mode, Copy)) {
92 } else if (VG_XACT_CLO(arg, "--mode=ad-hoc", clo_mode, AdHoc)) {
94 } else {
95 return VG_(replacement_malloc_process_cmd_line_option)(arg);
98 return True;
101 static void dh_print_usage(void)
103 VG_(printf)(
104 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
105 " --mode=heap|copy|ad-hoc profiling mode\n"
109 static void dh_print_debug_usage(void)
111 VG_(printf)(
112 " (none)\n"
116 //------------------------------------------------------------//
117 //--- an Interval Tree of live blocks ---//
118 //------------------------------------------------------------//
120 /* Tracks information about live blocks. */
121 typedef
122 struct {
123 Addr payload;
124 SizeT req_szB;
125 ExeContext* ec; /* allocation ec */
126 ULong allocd_at; /* instruction number */
127 ULong reads_bytes;
128 ULong writes_bytes;
129 /* Approx histogram, one byte per payload byte. Counts latch up
130 therefore at 0xFFFF. Can be NULL if the block is resized or if
131 the block is larger than HISTOGRAM_SIZE_LIMIT. */
132 UShort* histoW; /* [0 .. req_szB-1] */
134 Block;
136 /* May not contain zero-sized blocks. May not contain
137 overlapping blocks. */
138 static WordFM* interval_tree = NULL; /* WordFM* Block* void */
140 /* Here's the comparison function. Since the tree is required
141 to contain non-zero sized, non-overlapping blocks, it's good
142 enough to consider any overlap as a match. */
143 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
145 Block* b1 = (Block*)k1;
146 Block* b2 = (Block*)k2;
147 tl_assert(b1->req_szB > 0);
148 tl_assert(b2->req_szB > 0);
149 if (b1->payload + b1->req_szB <= b2->payload) return -1;
150 if (b2->payload + b2->req_szB <= b1->payload) return 1;
151 return 0;
154 // 2-entry cache for find_Block_containing
155 static Block* fbc_cache0 = NULL;
156 static Block* fbc_cache1 = NULL;
158 static UWord stats__n_fBc_cached = 0;
159 static UWord stats__n_fBc_uncached = 0;
160 static UWord stats__n_fBc_notfound = 0;
162 static Block* find_Block_containing ( Addr a )
164 tl_assert(clo_mode == Heap);
166 if (LIKELY(fbc_cache0
167 && fbc_cache0->payload <= a
168 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
169 // found at 0
170 stats__n_fBc_cached++;
171 return fbc_cache0;
173 if (LIKELY(fbc_cache1
174 && fbc_cache1->payload <= a
175 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
176 // found at 1; swap 0 and 1
177 Block* tmp = fbc_cache0;
178 fbc_cache0 = fbc_cache1;
179 fbc_cache1 = tmp;
180 stats__n_fBc_cached++;
181 return fbc_cache0;
183 Block fake;
184 fake.payload = a;
185 fake.req_szB = 1;
186 UWord foundkey = 1;
187 UWord foundval = 1;
188 Bool found = VG_(lookupFM)( interval_tree,
189 &foundkey, &foundval, (UWord)&fake );
190 if (!found) {
191 stats__n_fBc_notfound++;
192 return NULL;
194 tl_assert(foundval == 0); // we don't store vals in the interval tree
195 tl_assert(foundkey != 1);
196 Block* res = (Block*)foundkey;
197 tl_assert(res != &fake);
198 // put at the top position
199 fbc_cache1 = fbc_cache0;
200 fbc_cache0 = res;
201 stats__n_fBc_uncached++;
202 return res;
205 // delete a block; asserts if not found. (viz, 'a' must be
206 // known to be present.)
207 static void delete_Block_starting_at ( Addr a )
209 tl_assert(clo_mode == Heap);
211 Block fake;
212 fake.payload = a;
213 fake.req_szB = 1;
214 Bool found = VG_(delFromFM)( interval_tree,
215 NULL, NULL, (Addr)&fake );
216 tl_assert(found);
217 fbc_cache0 = fbc_cache1 = NULL;
220 //------------------------------------------------------------//
221 //--- a FM of allocation points (APs) ---//
222 //------------------------------------------------------------//
224 typedef
225 struct {
226 // The program point that we're summarising stats for.
227 ExeContext* ec;
229 // Total number of blocks and bytes allocated by this PP.
230 ULong total_blocks;
231 ULong total_bytes;
233 // The current number of blocks and bytes live for this PP.
234 // Only used with clo_mode=Heap.
235 ULong curr_blocks;
236 ULong curr_bytes;
238 // Values at the PP max, i.e. when this PP's curr_bytes peaks.
239 // Only used with clo_mode=Heap.
240 ULong max_blocks; // Blocks at the PP max.
241 ULong max_bytes; // The PP max, measured in bytes.
243 // Values at the global max.
244 // Only used with clo_mode=Heap.
245 ULong at_tgmax_blocks;
246 ULong at_tgmax_bytes;
248 // Total lifetimes of all blocks allocated by this PP. Includes blocks
249 // explicitly freed and blocks implicitly freed at termination.
250 // Only used with clo_mode=Heap.
251 ULong total_lifetimes_instrs;
253 // Number of blocks freed by this PP. (Only used in assertions.)
254 // Only used with clo_mode=Heap.
255 ULong freed_blocks;
257 // Total number of reads and writes in all blocks allocated
258 // by this PP. Only used with clo_mode=Heap.
259 ULong reads_bytes;
260 ULong writes_bytes;
262 /* Histogram information. We maintain a histogram aggregated for
263 all retiring Blocks allocated by this PP, but only if:
264 - this PP has only ever allocated objects of one size
265 - that size is <= HISTOGRAM_SIZE_LIMIT
266 What we need therefore is a mechanism to see if this PP
267 has only ever allocated blocks of one size.
269 3 states:
270 Unknown because no retirement yet
271 Exactly xsize all retiring blocks are of this size
272 Mixed multiple different sizes seen
274 Only used with clo_mode=Heap.
276 enum { Unknown=999, Exactly, Mixed } xsize_tag;
277 SizeT xsize;
278 UInt* histo; /* [0 .. xsize-1] */
280 PPInfo;
282 /* maps ExeContext*'s to PPInfo*'s. Note that the keys must match the
283 .ec field in the values. */
284 static WordFM* ppinfo = NULL; /* WordFM* ExeContext* PPInfo* */
286 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
287 // all PPInfos. Note that this is moderately expensive so we avoid calling it
288 // on every allocation.
289 static void check_for_peak(void)
291 tl_assert(clo_mode == Heap);
293 if (g_curr_bytes == g_max_bytes) {
294 // It's a peak. (If there are multiple equal peaks we record the latest
295 // one.)
296 UWord keyW, valW;
297 VG_(initIterFM)(ppinfo);
298 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
299 PPInfo* ppi = (PPInfo*)valW;
300 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
301 ppi->at_tgmax_blocks = ppi->curr_blocks;
302 ppi->at_tgmax_bytes = ppi->curr_bytes;
304 VG_(doneIterFM)(ppinfo);
308 /* 'bk' is being introduced (has just been allocated). Find the
309 relevant PPInfo entry for it, or create one, based on the block's
310 allocation EC. Then, update the PPInfo to the extent that we
311 actually can, to reflect the allocation. */
312 static void intro_Block(Block* bk)
314 tl_assert(bk);
315 tl_assert(bk->ec);
317 PPInfo* ppi = NULL;
318 UWord keyW = 0;
319 UWord valW = 0;
320 Bool found = VG_(lookupFM)( ppinfo,
321 &keyW, &valW, (UWord)bk->ec );
322 if (found) {
323 ppi = (PPInfo*)valW;
324 tl_assert(keyW == (UWord)bk->ec);
325 } else {
326 ppi = VG_(malloc)( "dh.intro_Block.1", sizeof(PPInfo) );
327 VG_(memset)(ppi, 0, sizeof(*ppi));
328 ppi->ec = bk->ec;
329 Bool present = VG_(addToFM)( ppinfo,
330 (UWord)bk->ec, (UWord)ppi );
331 tl_assert(!present);
332 if (clo_mode == Heap) {
333 // histo stuff
334 tl_assert(ppi->freed_blocks == 0);
335 ppi->xsize_tag = Unknown;
336 ppi->xsize = 0;
337 if (0) VG_(printf)("ppi %p --> Unknown\n", ppi);
341 tl_assert(ppi->ec == bk->ec);
343 // Update global stats and PPInfo stats.
345 g_total_blocks++;
346 g_total_bytes += bk->req_szB;
348 ppi->total_blocks++;
349 ppi->total_bytes += bk->req_szB;
351 if (clo_mode == Heap) {
352 g_curr_blocks++;
353 g_curr_bytes += bk->req_szB;
355 ppi->curr_blocks++;
356 ppi->curr_bytes += bk->req_szB;
358 // The use of `>=` rather than `>` means that if there are multiple equal
359 // peaks we record the latest one, like `check_for_peak` does.
360 if (g_curr_bytes >= g_max_bytes) {
361 g_max_blocks = g_curr_blocks;
362 g_max_bytes = g_curr_bytes;
363 g_tgmax_instrs = g_curr_instrs;
365 ppi->max_blocks = ppi->curr_blocks;
366 ppi->max_bytes = ppi->curr_bytes;
371 /* 'bk' is retiring (being freed). Find the relevant PPInfo entry for
372 it, which must already exist. Then, fold info from 'bk' into that
373 entry. 'because_freed' is True if the block is retiring because
374 the client has freed it. If it is False then the block is retiring
375 because the program has finished, in which case we want to skip the
376 updates of the total blocks live etc for this PP, but still fold in
377 the access counts and histo data that have so far accumulated for
378 the block. */
379 static void retire_Block(Block* bk, Bool because_freed)
381 tl_assert(clo_mode == Heap);
382 tl_assert(bk);
383 tl_assert(bk->ec);
385 PPInfo* ppi = NULL;
386 UWord keyW = 0;
387 UWord valW = 0;
388 Bool found = VG_(lookupFM)( ppinfo,
389 &keyW, &valW, (UWord)bk->ec );
390 tl_assert(found);
391 ppi = (PPInfo*)valW;
392 tl_assert(ppi->ec == bk->ec);
394 // update stats following this free.
395 if (0)
396 VG_(printf)("ec %p ppi->c_by_l %llu bk->rszB %llu\n",
397 bk->ec, ppi->curr_bytes, (ULong)bk->req_szB);
399 if (because_freed) {
400 // Total bytes is coming down from a possible peak.
401 check_for_peak();
403 // Then update global stats.
404 tl_assert(g_curr_blocks >= 1);
405 tl_assert(g_curr_bytes >= bk->req_szB);
406 g_curr_blocks--;
407 g_curr_bytes -= bk->req_szB;
409 // Then update PPInfo stats.
410 tl_assert(ppi->curr_blocks >= 1);
411 tl_assert(ppi->curr_bytes >= bk->req_szB);
412 ppi->curr_blocks--;
413 ppi->curr_bytes -= bk->req_szB;
415 ppi->freed_blocks++;
418 tl_assert(bk->allocd_at <= g_curr_instrs);
419 ppi->total_lifetimes_instrs += (g_curr_instrs - bk->allocd_at);
421 // access counts
422 ppi->reads_bytes += bk->reads_bytes;
423 ppi->writes_bytes += bk->writes_bytes;
424 g_reads_bytes += bk->reads_bytes;
425 g_writes_bytes += bk->writes_bytes;
427 // histo stuff. First, do state transitions for xsize/xsize_tag.
428 switch (ppi->xsize_tag) {
430 case Unknown:
431 tl_assert(ppi->xsize == 0);
432 tl_assert(ppi->freed_blocks == 1 || ppi->freed_blocks == 0);
433 tl_assert(!ppi->histo);
434 ppi->xsize_tag = Exactly;
435 ppi->xsize = bk->req_szB;
436 if (0) VG_(printf)("ppi %p --> Exactly(%lu)\n", ppi, ppi->xsize);
437 // and allocate the histo
438 if (bk->histoW) {
439 ppi->histo = VG_(malloc)("dh.retire_Block.1",
440 ppi->xsize * sizeof(UInt));
441 VG_(memset)(ppi->histo, 0, ppi->xsize * sizeof(UInt));
443 break;
445 case Exactly:
446 //tl_assert(ppi->freed_blocks > 1);
447 if (bk->req_szB != ppi->xsize) {
448 if (0) VG_(printf)("ppi %p --> Mixed(%lu -> %lu)\n",
449 ppi, ppi->xsize, bk->req_szB);
450 ppi->xsize_tag = Mixed;
451 ppi->xsize = 0;
452 // deallocate the histo, if any
453 if (ppi->histo) {
454 VG_(free)(ppi->histo);
455 ppi->histo = NULL;
458 break;
460 case Mixed:
461 //tl_assert(ppi->freed_blocks > 1);
462 break;
464 default:
465 tl_assert(0);
468 // See if we can fold the histo data from this block into
469 // the data for the PP.
470 if (ppi->xsize_tag == Exactly && ppi->histo && bk->histoW) {
471 tl_assert(ppi->xsize == bk->req_szB);
472 UWord i;
473 for (i = 0; i < ppi->xsize; i++) {
474 // FIXME: do something better in case of overflow of ppi->histo[..]
475 // Right now, at least don't let it overflow/wrap around
476 if (ppi->histo[i] <= 0xFFFE0000)
477 ppi->histo[i] += (UInt)bk->histoW[i];
479 if (0) VG_(printf)("fold in, PP = %p\n", ppi);
482 #if 0
483 if (bk->histoB) {
484 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
485 UWord i;
486 for (i = 0; i < bk->req_szB; i++)
487 VG_(printf)("%u ", (UInt)bk->histoB[i]);
488 VG_(printf)("\n");
489 } else {
490 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
492 #endif
495 /* This handles block resizing. When a block with PP 'ec' has a
496 size change of 'delta', call here to update the PPInfo. */
497 static void resize_Block(ExeContext* ec, SizeT old_req_szB, SizeT new_req_szB)
499 tl_assert(clo_mode == Heap);
501 Long delta = (Long)new_req_szB - (Long)old_req_szB;
502 PPInfo* ppi = NULL;
503 UWord keyW = 0;
504 UWord valW = 0;
505 Bool found = VG_(lookupFM)( ppinfo,
506 &keyW, &valW, (UWord)ec );
508 tl_assert(found);
509 ppi = (PPInfo*)valW;
510 tl_assert(ppi->ec == ec);
512 if (delta < 0) {
513 tl_assert(ppi->curr_bytes >= -delta);
514 tl_assert(g_curr_bytes >= -delta);
516 // Total bytes might be coming down from a possible peak.
517 check_for_peak();
520 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
521 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
523 // A reasonable alternative would be to leave total_blocks unchanged and
524 // increment total_bytes by delta (but only if delta is positive). But then
525 // calls to realloc wouldn't be counted towards the total_blocks count,
526 // which is undesirable.
528 // Update global stats and PPInfo stats.
530 g_total_blocks++;
531 g_total_bytes += new_req_szB;
533 ppi->total_blocks++;
534 ppi->total_bytes += new_req_szB;
536 g_curr_blocks += 0; // unchanged
537 g_curr_bytes += delta;
539 ppi->curr_blocks += 0; // unchanged
540 ppi->curr_bytes += delta;
542 // The use of `>=` rather than `>` means that if there are multiple equal
543 // peaks we record the latest one, like `check_for_peak` does.
544 if (g_curr_bytes >= g_max_bytes) {
545 g_max_blocks = g_curr_blocks;
546 g_max_bytes = g_curr_bytes;
547 g_tgmax_instrs = g_curr_instrs;
549 ppi->max_blocks = ppi->curr_blocks;
550 ppi->max_bytes = ppi->curr_bytes;
554 //------------------------------------------------------------//
555 //--- update both Block and PPInfos after {m,re}alloc/free ---//
556 //------------------------------------------------------------//
558 static
559 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
560 Bool is_zeroed )
562 tl_assert(p == NULL); // don't handle custom allocators right now
563 SizeT actual_szB;
565 if ((SSizeT)req_szB < 0) return NULL;
567 if (req_szB == 0) {
568 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
571 // Allocate and zero if necessary
572 if (!p) {
573 p = VG_(cli_malloc)( req_alignB, req_szB );
574 if (!p) {
575 return NULL;
577 if (is_zeroed) VG_(memset)(p, 0, req_szB);
578 actual_szB = VG_(cli_malloc_usable_size)(p);
579 tl_assert(actual_szB >= req_szB);
582 if (clo_mode != Heap) {
583 return p;
586 // Make new Block, add to interval_tree.
587 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
588 bk->payload = (Addr)p;
589 bk->req_szB = req_szB;
590 bk->ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
591 bk->allocd_at = g_curr_instrs;
592 bk->reads_bytes = 0;
593 bk->writes_bytes = 0;
594 // Set up histogram array, if the block isn't too large.
595 bk->histoW = NULL;
596 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
597 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
598 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
601 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
602 tl_assert(!present);
603 fbc_cache0 = fbc_cache1 = NULL;
605 intro_Block(bk);
607 return p;
610 static
611 void die_block ( void* p )
613 VG_(cli_free)(p);
615 if (clo_mode != Heap) {
616 return;
619 Block* bk = find_Block_containing( (Addr)p );
620 if (!bk) {
621 return; // bogus free
624 tl_assert(bk->req_szB > 0);
625 // assert the block finder is behaving sanely
626 tl_assert(bk->payload <= (Addr)p);
627 tl_assert( (Addr)p < bk->payload + bk->req_szB );
629 if (bk->payload != (Addr)p) {
630 return; // bogus free
633 retire_Block(bk, True/*because_freed*/);
635 delete_Block_starting_at( bk->payload );
636 if (bk->histoW) {
637 VG_(free)( bk->histoW );
638 bk->histoW = NULL;
640 VG_(free)( bk );
643 static
644 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
646 void* p_new = NULL;
648 tl_assert(new_req_szB > 0); // map 0 to 1
650 if (clo_mode != Heap) {
651 SizeT old_actual_szB = VG_(cli_malloc_usable_size)(p_old);
652 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
653 if (!p_new) {
654 return NULL;
656 VG_(memmove)(p_new, p_old, VG_MIN(old_actual_szB, new_req_szB));
657 VG_(cli_free)(p_old);
658 return p_new;
661 // Find the old block.
662 Block* bk = find_Block_containing( (Addr)p_old );
663 if (!bk) {
664 return NULL; // bogus realloc
667 tl_assert(bk->req_szB > 0);
668 // Assert the block finder is behaving sanely.
669 tl_assert(bk->payload <= (Addr)p_old);
670 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
672 if (bk->payload != (Addr)p_old) {
673 return NULL; // bogus realloc
676 // Keeping the histogram alive in any meaningful way across
677 // block resizing is too darn complicated. Just throw it away.
678 if (bk->histoW) {
679 VG_(free)(bk->histoW);
680 bk->histoW = NULL;
683 // Actually do the allocation, if necessary.
684 if (new_req_szB <= bk->req_szB) {
685 // New size is smaller or same; block not moved.
686 resize_Block(bk->ec, bk->req_szB, new_req_szB);
687 bk->req_szB = new_req_szB;
689 // Update reads/writes for the implicit copy. Even though we didn't
690 // actually do a copy, we act like we did, to match up with the fact
691 // that we treat this as an additional allocation.
692 bk->reads_bytes += new_req_szB;
693 bk->writes_bytes += new_req_szB;
695 p_new = p_old;
697 } else {
698 // New size is bigger; make new block, copy shared contents, free old.
699 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
700 if (!p_new) {
701 // Nb: if realloc fails, NULL is returned but the old block is not
702 // touched. What an awful function.
703 return NULL;
705 tl_assert(p_new != p_old);
707 VG_(memcpy)(p_new, p_old, bk->req_szB);
708 VG_(cli_free)(p_old);
710 // Since the block has moved, we need to re-insert it into the
711 // interval tree at the new place. Do this by removing
712 // and re-adding it.
713 delete_Block_starting_at( (Addr)p_old );
714 // Now 'bk' is no longer in the tree, but the Block itself
715 // is still alive.
717 // Update reads/writes for the copy.
718 bk->reads_bytes += bk->req_szB;
719 bk->writes_bytes += bk->req_szB;
721 // Update the metadata.
722 resize_Block(bk->ec, bk->req_szB, new_req_szB);
723 bk->payload = (Addr)p_new;
724 bk->req_szB = new_req_szB;
726 // And re-add it to the interval tree.
727 Bool present
728 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
729 tl_assert(!present);
730 fbc_cache0 = fbc_cache1 = NULL;
733 return p_new;
736 //------------------------------------------------------------//
737 //--- malloc() et al replacement wrappers ---//
738 //------------------------------------------------------------//
740 static void* dh_malloc ( ThreadId tid, SizeT szB )
742 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
745 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
747 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
750 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
752 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
755 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
757 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
760 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
762 return new_block( tid, NULL, szB, alignB, False );
765 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
767 die_block(p);
770 static void dh___builtin_delete ( ThreadId tid, void* p )
772 die_block(p);
775 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
777 die_block(p);
780 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
782 if (p_old == NULL) {
783 return dh_malloc(tid, new_szB);
785 if (new_szB == 0) {
786 dh_free(tid, p_old);
787 return NULL;
789 return renew_block(tid, p_old, new_szB);
792 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
794 if (clo_mode != Heap) {
795 return VG_(cli_malloc_usable_size)(p);
798 Block* bk = find_Block_containing( (Addr)p );
799 return bk ? bk->req_szB : 0;
802 //------------------------------------------------------------//
803 //--- memory references ---//
804 //------------------------------------------------------------//
806 static
807 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
809 UWord i, offMin, offMax1;
810 offMin = addr - bk->payload;
811 tl_assert(offMin < bk->req_szB);
812 offMax1 = offMin + szB;
813 if (offMax1 > bk->req_szB)
814 offMax1 = bk->req_szB;
815 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
816 for (i = offMin; i < offMax1; i++) {
817 UShort n = bk->histoW[i];
818 if (n < 0xFFFF) n++;
819 bk->histoW[i] = n;
823 static VG_REGPARM(2)
824 void dh_handle_write ( Addr addr, UWord szB )
826 tl_assert(clo_mode == Heap);
828 Block* bk = find_Block_containing(addr);
829 if (bk) {
830 bk->writes_bytes += szB;
831 if (bk->histoW)
832 inc_histo_for_block(bk, addr, szB);
836 static VG_REGPARM(2)
837 void dh_handle_read ( Addr addr, UWord szB )
839 tl_assert(clo_mode == Heap);
841 Block* bk = find_Block_containing(addr);
842 if (bk) {
843 bk->reads_bytes += szB;
844 if (bk->histoW)
845 inc_histo_for_block(bk, addr, szB);
849 // Handle reads and writes by syscalls (read == kernel
850 // reads user space, write == kernel writes user space).
851 // Assumes no such read or write spans a heap block
852 // boundary and so we can treat it just as one giant
853 // read or write.
854 static
855 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
856 Addr base, SizeT size )
858 tl_assert(clo_mode == Heap);
860 switch (part) {
861 case Vg_CoreSysCall:
862 dh_handle_read(base, size);
863 break;
864 case Vg_CoreSysCallArgInMem:
865 break;
866 case Vg_CoreTranslate:
867 break;
868 default:
869 tl_assert(0);
873 static
874 void dh_handle_noninsn_read_asciiz(CorePart part, ThreadId tid, const HChar* s,
875 Addr str)
877 tl_assert(clo_mode == Heap);
879 tl_assert(part == Vg_CoreSysCall);
880 dh_handle_noninsn_read(part, tid, s, str, VG_(strlen)((const HChar*)str+1));
883 static
884 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
885 Addr base, SizeT size )
887 tl_assert(clo_mode == Heap);
889 switch (part) {
890 case Vg_CoreSysCall:
891 case Vg_CoreClientReq:
892 dh_handle_write(base, size);
893 break;
894 case Vg_CoreSignal:
895 break;
896 default:
897 tl_assert(0);
901 //------------------------------------------------------------//
902 //--- Instrumentation ---//
903 //------------------------------------------------------------//
905 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
906 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
907 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
908 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
909 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
911 static
912 void add_counter_update(IRSB* sbOut, Int n)
914 #if defined(VG_BIGENDIAN)
915 # define END Iend_BE
916 #elif defined(VG_LITTLEENDIAN)
917 # define END Iend_LE
918 #else
919 # error "Unknown endianness"
920 #endif
921 // Add code to increment 'g_curr_instrs' by 'n', like this:
922 // WrTmp(t1, Load64(&g_curr_instrs))
923 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
924 // Store(&g_curr_instrs, t2)
925 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
926 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
927 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_curr_instrs );
929 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
930 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
931 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
933 addStmtToIRSB( sbOut, st1 );
934 addStmtToIRSB( sbOut, st2 );
935 addStmtToIRSB( sbOut, st3 );
938 static
939 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
940 Int goff_sp)
942 if (clo_mode != Heap) {
943 return;
946 IRType tyAddr = Ity_INVALID;
947 const HChar* hName= NULL;
948 void* hAddr = NULL;
949 IRExpr** argv = NULL;
950 IRDirty* di = NULL;
952 const Int THRESH = 4096 * 4; // somewhat arbitrary
953 const Int rz_szB = VG_STACK_REDZONE_SZB;
955 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
956 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
958 if (isWrite) {
959 hName = "dh_handle_write";
960 hAddr = &dh_handle_write;
961 } else {
962 hName = "dh_handle_read";
963 hAddr = &dh_handle_read;
966 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
968 /* Add the helper. */
969 tl_assert(hName);
970 tl_assert(hAddr);
971 tl_assert(argv);
972 di = unsafeIRDirty_0_N( 2/*regparms*/,
973 hName, VG_(fnptr_to_fnentry)( hAddr ),
974 argv );
976 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
977 some arbitrary N. If that fails then addr is in the range (SP -
978 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
979 addr is within a page of SP and so can't possibly be a heap
980 access, and so can be skipped. */
981 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
982 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
984 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
985 addStmtToIRSB(
986 sbOut,
987 assign(sp_minus_rz,
988 tyAddr == Ity_I32
989 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
990 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
993 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
994 addStmtToIRSB(
995 sbOut,
996 assign(diff,
997 tyAddr == Ity_I32
998 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
999 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
1002 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
1003 addStmtToIRSB(
1004 sbOut,
1005 assign(guard,
1006 tyAddr == Ity_I32
1007 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
1008 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
1010 di->guard = mkexpr(guard);
1012 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
1015 static
1016 IRSB* dh_instrument ( VgCallbackClosure* closure,
1017 IRSB* sbIn,
1018 const VexGuestLayout* layout,
1019 const VexGuestExtents* vge,
1020 const VexArchInfo* archinfo_host,
1021 IRType gWordTy, IRType hWordTy )
1023 Int i, n = 0;
1024 IRSB* sbOut;
1025 IRTypeEnv* tyenv = sbIn->tyenv;
1027 const Int goff_sp = layout->offset_SP;
1029 // We increment the instruction count in two places:
1030 // - just before any Ist_Exit statements;
1031 // - just before the IRSB's end.
1032 // In the former case, we zero 'n' and then continue instrumenting.
1034 sbOut = deepCopyIRSBExceptStmts(sbIn);
1036 // Copy verbatim any IR preamble preceding the first IMark
1037 i = 0;
1038 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1039 addStmtToIRSB( sbOut, sbIn->stmts[i] );
1040 i++;
1043 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1044 IRStmt* st = sbIn->stmts[i];
1046 if (!st || st->tag == Ist_NoOp) continue;
1048 switch (st->tag) {
1050 case Ist_IMark: {
1051 n++;
1052 break;
1055 case Ist_Exit: {
1056 if (n > 0) {
1057 // Add an increment before the Exit statement, then reset 'n'.
1058 add_counter_update(sbOut, n);
1059 n = 0;
1061 break;
1064 case Ist_WrTmp: {
1065 IRExpr* data = st->Ist.WrTmp.data;
1066 if (data->tag == Iex_Load) {
1067 IRExpr* aexpr = data->Iex.Load.addr;
1068 // Note also, endianness info is ignored. I guess
1069 // that's not interesting.
1070 addMemEvent( sbOut, False/*!isWrite*/,
1071 sizeofIRType(data->Iex.Load.ty),
1072 aexpr, goff_sp );
1074 break;
1077 case Ist_Store: {
1078 IRExpr* data = st->Ist.Store.data;
1079 IRExpr* aexpr = st->Ist.Store.addr;
1080 addMemEvent( sbOut, True/*isWrite*/,
1081 sizeofIRType(typeOfIRExpr(tyenv, data)),
1082 aexpr, goff_sp );
1083 break;
1086 case Ist_Dirty: {
1087 Int dataSize;
1088 IRDirty* d = st->Ist.Dirty.details;
1089 if (d->mFx != Ifx_None) {
1090 /* This dirty helper accesses memory. Collect the details. */
1091 tl_assert(d->mAddr != NULL);
1092 tl_assert(d->mSize != 0);
1093 dataSize = d->mSize;
1094 // Large (eg. 28B, 108B, 512B on x86) data-sized
1095 // instructions will be done inaccurately, but they're
1096 // very rare and this avoids errors from hitting more
1097 // than two cache lines in the simulation.
1098 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1099 addMemEvent( sbOut, False/*!isWrite*/,
1100 dataSize, d->mAddr, goff_sp );
1101 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1102 addMemEvent( sbOut, True/*isWrite*/,
1103 dataSize, d->mAddr, goff_sp );
1104 } else {
1105 tl_assert(d->mAddr == NULL);
1106 tl_assert(d->mSize == 0);
1108 break;
1111 case Ist_CAS: {
1112 /* We treat it as a read and a write of the location. I
1113 think that is the same behaviour as it was before IRCAS
1114 was introduced, since prior to that point, the Vex
1115 front ends would translate a lock-prefixed instruction
1116 into a (normal) read followed by a (normal) write. */
1117 Int dataSize;
1118 IRCAS* cas = st->Ist.CAS.details;
1119 tl_assert(cas->addr != NULL);
1120 tl_assert(cas->dataLo != NULL);
1121 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1122 if (cas->dataHi != NULL)
1123 dataSize *= 2; /* since it's a doubleword-CAS */
1124 addMemEvent( sbOut, False/*!isWrite*/,
1125 dataSize, cas->addr, goff_sp );
1126 addMemEvent( sbOut, True/*isWrite*/,
1127 dataSize, cas->addr, goff_sp );
1128 break;
1131 case Ist_LLSC: {
1132 IRType dataTy;
1133 if (st->Ist.LLSC.storedata == NULL) {
1134 /* LL */
1135 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1136 addMemEvent( sbOut, False/*!isWrite*/,
1137 sizeofIRType(dataTy),
1138 st->Ist.LLSC.addr, goff_sp );
1139 } else {
1140 /* SC */
1141 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1142 addMemEvent( sbOut, True/*isWrite*/,
1143 sizeofIRType(dataTy),
1144 st->Ist.LLSC.addr, goff_sp );
1146 break;
1149 default:
1150 break;
1153 addStmtToIRSB( sbOut, st );
1156 if (n > 0) {
1157 // Add an increment before the SB end.
1158 add_counter_update(sbOut, n);
1160 return sbOut;
1163 #undef binop
1164 #undef mkexpr
1165 #undef mkU32
1166 #undef mkU64
1167 #undef assign
1169 //------------------------------------------------------------//
1170 //--- Client requests ---//
1171 //------------------------------------------------------------//
1173 static Bool dh_handle_client_request(ThreadId tid, UWord* arg, UWord* ret)
1175 switch (arg[0]) {
1176 case VG_USERREQ__DHAT_AD_HOC_EVENT: {
1177 if (clo_mode != AdHoc) {
1178 return False;
1181 SizeT len = (SizeT)arg[1];
1183 // Only the ec and req_szB fields are used by intro_Block().
1184 Block bk;
1185 VG_(memset)(&bk, 0, sizeof(bk));
1186 bk.req_szB = len;
1187 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1189 intro_Block(&bk);
1191 return True;
1194 case _VG_USERREQ__DHAT_COPY: {
1195 SizeT len = (SizeT)arg[1];
1197 if (clo_mode != Copy) {
1198 return False;
1201 // Only the ec and req_szB fields are used by intro_Block().
1202 Block bk;
1203 VG_(memset)(&bk, 0, sizeof(bk));
1204 bk.req_szB = len;
1205 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1207 intro_Block(&bk);
1209 return True;
1212 default:
1213 VG_(message)(
1214 Vg_UserMsg,
1215 "Warning: unknown DHAT client request code %llx\n",
1216 (ULong)arg[0]
1218 return False;
1222 //------------------------------------------------------------//
1223 //--- Finalisation ---//
1224 //------------------------------------------------------------//
1226 // File format notes.
1228 // - The files are JSON, because it's a widely-used format and saves us having
1229 // to write a parser in dh_view.js.
1231 // - We use a comma-first style for the generated JSON. Comma-first style
1232 // moves the special case for arrays/objects from the last item to the
1233 // first. This helps in cases where you can't easily tell in advance the
1234 // size of arrays/objects, such as iterating over a WordFM (because
1235 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1236 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1238 // - We use short field names and minimal whitespace to minimize file sizes.
1240 // Sample output:
1242 // {
1243 // // Version number of the format. Incremented on each
1244 // // backwards-incompatible change. A mandatory integer.
1245 // "dhatFileVersion": 2,
1247 // // The invocation mode. A mandatory, free-form string.
1248 // "mode": "heap",
1250 // // The verb used before above stack frames, i.e. "<verb> at {". A
1251 // // mandatory string.
1252 // "verb": "Allocated",
1254 // // Are block lifetimes recorded? Affects whether some other fields are
1255 // // present. A mandatory boolean.
1256 // "bklt": true,
1258 // // Are block accesses recorded? Affects whether some other fields are
1259 // // present. A mandatory boolean.
1260 // "bkacc": true,
1262 // // Byte/bytes/blocks-position units. Optional strings. "byte", "bytes",
1263 // // and "blocks" are the values used if these fields are omitted.
1264 // "bu": "byte", "bsu": "bytes", "bksu": "blocks",
1266 // // Time units (individual and 1,000,000x). Mandatory strings.
1267 // "tu": "instrs", "Mtu": "Minstr"
1269 // // The "short-lived" time threshold, measures in "tu"s.
1270 // // - bklt=true: a mandatory integer.
1271 // // - bklt=false: omitted.
1272 // "tuth": 500,
1274 // // The executed command. A mandatory string.
1275 // "cmd": "date",
1277 // // The process ID. A mandatory integer.
1278 // "pid": 61129
1280 // // The time at the end of execution (t-end). A mandatory integer.
1281 // "te": 350682
1283 // // The time of the global max (t-gmax).
1284 // // - bklt=true: a mandatory integer.
1285 // // - bklt=false: omitted.
1286 // "tg": 331312,
1288 // // The program points. A mandatory array.
1289 // "pps": [
1290 // {
1291 // // Total bytes and blocks. Mandatory integers.
1292 // "tb": 5, "tbk": 1,
1294 // // Total lifetimes of all blocks allocated at this PP.
1295 // // - bklt=true: a mandatory integer.
1296 // // - bklt=false: omitted.
1297 // "tl": 274,
1299 // // The maximum bytes and blocks for this PP.
1300 // // - bklt=true: mandatory integers.
1301 // // - bklt=false: omitted.
1302 // "mb": 5, "mbk": 1,
1304 // // The bytes and blocks at t-gmax for this PP.
1305 // // - bklt=true: mandatory integers.
1306 // // - bklt=false: omitted.
1307 // "gb": 0, "gbk": 0,
1309 // // The bytes and blocks at t-end for this PP.
1310 // // - bklt=true: mandatory integers.
1311 // // - bklt=false: omitted.
1312 // "eb": 0, "ebk": 0,
1314 // // The reads and writes of blocks for this PP.
1315 // // - bkacc=true: mandatory integers.
1316 // // - bkacc=false: omitted.
1317 // "rb": 41, "wb": 5,
1319 // // The exact accesses of blocks for this PP. Only used when all
1320 // // allocations are the same size and sufficiently small. A negative
1321 // // element indicates run-length encoding of the following integer.
1322 // // E.g. `-3, 4` means "three 4s in a row".
1323 // // - bkacc=true: an optional array of integers.
1324 // // - bkacc=false: omitted.
1325 // "acc": [5, -3, 4, 2],
1327 // // Frames. Each element is an index into the "ftbl" array below.
1328 // // - All modes: A mandatory array of integers.
1329 // "fs": [1, 2, 3]
1330 // }
1331 // ],
1333 // // Frame table. A mandatory array of strings.
1334 // "ftbl": [
1335 // "[root]",
1336 // "0x4AA1D9F: _nl_normalize_codeset (l10nflist.c:332)",
1337 // "0x4A9B414: _nl_load_locale_from_archive (loadarchive.c:173)",
1338 // "0x4A9A2BE: _nl_find_locale (findlocale.c:153)"
1339 // ]
1340 // }
1342 static VgFile* fp;
1344 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1346 // The frame table holds unique frames.
1347 static WordFM* frame_tbl = NULL;
1348 static UWord next_frame_n = 0;
1350 static Word frame_cmp(UWord a, UWord b)
1352 return VG_(strcmp)((const HChar*)a, (const HChar*)b);
1355 static HChar hex_digit_to_ascii_char(UChar d)
1357 d = d & 0xf;
1358 return (d < 10) ? ('0' + d) : ('a' + (d - 10));
1361 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1363 // Returns the original string if no escaping was required. Returns a pointer
1364 // to a static buffer if escaping was required. Therefore, the return value is
1365 // only valid until the next call to this function.
1366 static const HChar* json_escape(const HChar* s)
1368 static HChar* buf = NULL;
1369 static SizeT bufcap = 0;
1371 // Do we need any escaping?
1372 SizeT extra = 0;
1373 const HChar* p = s;
1374 while (*p) {
1375 UChar c = *p;
1376 if (c == '"' || c == '\\') {
1377 extra += 1;
1378 } else if (c <= 0x1f) {
1379 extra += 5;
1381 p++;
1383 SizeT len = p - s;
1385 if (extra == 0) {
1386 // No escaping needed.
1387 return s;
1390 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1391 // necessary.
1392 SizeT newcap = len + extra + 1;
1393 if (bufcap < newcap) {
1394 buf = VG_(realloc)("dh.json", buf, newcap);
1395 bufcap = newcap;
1398 p = s;
1399 HChar* q = buf;
1400 while (*p) {
1401 UChar c = *p;
1402 if (c == '"') {
1403 *q++ = '\\';
1404 *q++ = '"';
1405 } else if (c == '\\') {
1406 *q++ = '\\';
1407 *q++ = '\\';
1408 } else if (c <= 0x1f) {
1409 *q++ = '\\';
1410 *q++ = 'u';
1411 *q++ = '0';
1412 *q++ = '0';
1413 *q++ = hex_digit_to_ascii_char((c & 0x00f0) >> 4);
1414 *q++ = hex_digit_to_ascii_char(c & 0x000f);
1415 } else {
1416 *q++ = c;
1418 p++;
1420 *q = '\0';
1422 return buf;
1425 static void write_PPInfo_frame(UInt n, DiEpoch ep, Addr ip, void* opaque)
1427 Bool* is_first = (Bool*)opaque;
1428 InlIPCursor* iipc = VG_(new_IIPC)(ep, ip);
1430 do {
1431 const HChar* buf = VG_(describe_IP)(ep, ip, iipc);
1433 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1434 // `realloc`, `operator new`) because they're boring and clog up the
1435 // output.
1436 if (VG_(strstr)(buf, "vg_replace_malloc.c")) {
1437 continue;
1440 // If this description has been seen before, get its number. Otherwise,
1441 // give it a new number and put it in the table.
1442 UWord keyW = 0, valW = 0;
1443 UWord frame_n = 0;
1444 Bool found = VG_(lookupFM)(frame_tbl, &keyW, &valW, (UWord)buf);
1445 if (found) {
1446 //const HChar* str = (const HChar*)keyW;
1447 //tl_assert(0 == VG_(strcmp)(buf, str));
1448 frame_n = valW;
1449 } else {
1450 // `buf` is a static buffer, we must copy it.
1451 const HChar* str = VG_(strdup)("dh.frame_tbl.3", buf);
1452 frame_n = next_frame_n++;
1453 Bool present = VG_(addToFM)(frame_tbl, (UWord)str, frame_n);
1454 tl_assert(!present);
1457 FP("%c%lu", *is_first ? '[' : ',', frame_n);
1458 *is_first = False;
1460 } while (VG_(next_IIPC)(iipc));
1462 VG_(delete_IIPC)(iipc);
1465 static void write_PPInfo(PPInfo* ppi, Bool is_first)
1467 FP(" %c{\"tb\":%llu,\"tbk\":%llu\n",
1468 is_first ? '[' : ',',
1469 ppi->total_bytes, ppi->total_blocks);
1471 if (clo_mode == Heap) {
1472 tl_assert(ppi->total_blocks >= ppi->max_blocks);
1473 tl_assert(ppi->total_bytes >= ppi->max_bytes);
1475 FP(" ,\"tl\":%llu\n",
1476 ppi->total_lifetimes_instrs);
1477 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1478 ppi->max_bytes, ppi->max_blocks);
1479 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1480 ppi->at_tgmax_bytes, ppi->at_tgmax_blocks);
1481 FP(" ,\"eb\":%llu,\"ebk\":%llu\n",
1482 ppi->curr_bytes, ppi->curr_blocks);
1483 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1484 ppi->reads_bytes, ppi->writes_bytes);
1486 if (ppi->histo && ppi->xsize_tag == Exactly) {
1487 FP(" ,\"acc\":[");
1489 // Simple run-length encoding: when N entries in a row have the same
1490 // value M, we print "-N,M". If there is just one in a row, we just
1491 // print "M". This reduces file size significantly.
1492 UShort repval = 0;
1493 Int reps = 0;
1494 for (UWord i = 0; i < ppi->xsize; i++) {
1495 UShort h = ppi->histo[i];
1496 if (repval == h) {
1497 // Continue current run.
1498 reps++;
1499 } else {
1500 // End of run; print it.
1501 if (reps == 1) {
1502 FP("%u,", repval);
1503 } else if (reps > 1) {
1504 FP("-%d,%u,", reps, repval);
1506 reps = 1;
1507 repval = h;
1510 // Print the final run.
1511 if (reps == 1) {
1512 FP("%u", repval);
1513 } else if (reps > 1) {
1514 FP("-%d,%u", reps, repval);
1517 FP("]\n");
1519 } else {
1520 tl_assert(ppi->curr_bytes == 0);
1521 tl_assert(ppi->curr_blocks == 0);
1522 tl_assert(ppi->max_bytes == 0);
1523 tl_assert(ppi->max_blocks == 0);
1524 tl_assert(ppi->at_tgmax_bytes == 0);
1525 tl_assert(ppi->at_tgmax_blocks == 0);
1526 tl_assert(ppi->total_lifetimes_instrs == 0);
1527 tl_assert(ppi->freed_blocks == 0);
1528 tl_assert(ppi->reads_bytes == 0);
1529 tl_assert(ppi->writes_bytes == 0);
1530 tl_assert(ppi->xsize_tag == 0);
1531 tl_assert(ppi->xsize == 0);
1532 tl_assert(ppi->histo == NULL);
1535 FP(" ,\"fs\":");
1536 Bool is_first_frame = True;
1537 VG_(apply_ExeContext)(write_PPInfo_frame, &is_first_frame, ppi->ec);
1538 FP("]\n");
1540 FP(" }\n");
1543 static void write_PPInfos(void)
1545 UWord keyW, valW;
1547 FP(",\"pps\":\n");
1549 VG_(initIterFM)(ppinfo);
1550 Bool is_first = True;
1551 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
1552 PPInfo* ppi = (PPInfo*)valW;
1553 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
1554 write_PPInfo(ppi, is_first);
1555 is_first = False;
1557 VG_(doneIterFM)(ppinfo);
1559 if (is_first) {
1560 // We didn't print any elements. This happens if ppinfo is empty.
1561 FP(" [\n");
1564 FP(" ]\n");
1567 static void dh_fini(Int exit_status)
1569 // This function does lots of allocations that it doesn't bother to free,
1570 // because execution is almost over anyway.
1572 UWord keyW, valW;
1574 // Total bytes might be at a possible peak.
1575 if (clo_mode == Heap) {
1576 check_for_peak();
1578 // Before printing statistics, we must harvest various stats (such as
1579 // lifetimes and accesses) for all the blocks that are still alive.
1580 VG_(initIterFM)( interval_tree );
1581 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1582 Block* bk = (Block*)keyW;
1583 tl_assert(valW == 0);
1584 tl_assert(bk);
1585 retire_Block(bk, False/*!because_freed*/);
1587 VG_(doneIterFM)( interval_tree );
1589 // Stats.
1590 if (VG_(clo_stats)) {
1591 VG_(dmsg)(" dhat: find_Block_containing:\n");
1592 VG_(dmsg)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1593 stats__n_fBc_cached + stats__n_fBc_uncached,
1594 stats__n_fBc_cached,
1595 stats__n_fBc_uncached);
1596 VG_(dmsg)(" notfound: %'lu\n", stats__n_fBc_notfound);
1597 VG_(dmsg)("\n");
1601 // Create the frame table, and insert the special "[root]" node at index 0.
1602 frame_tbl = VG_(newFM)(VG_(malloc),
1603 "dh.frame_tbl.1",
1604 VG_(free),
1605 frame_cmp);
1606 const HChar* root = VG_(strdup)("dh.frame_tbl.2", "[root]");
1607 Bool present = VG_(addToFM)(frame_tbl, (UWord)root, 0);
1608 tl_assert(!present);
1609 next_frame_n = 1;
1611 // Setup output filename. Nb: it's important to do this now, i.e. as late
1612 // as possible. If we do it at start-up and the program forks and the
1613 // output file format string contains a %p (pid) specifier, both the parent
1614 // and child will incorrectly write to the same file; this happened in
1615 // 3.3.0.
1616 HChar* dhat_out_file =
1617 VG_(expand_file_name)("--dhat-out-file", clo_dhat_out_file);
1619 fp = VG_(fopen)(dhat_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1620 VKI_S_IRUSR|VKI_S_IWUSR);
1621 if (!fp) {
1622 VG_(umsg)("error: can't open DHAT output file '%s'\n", dhat_out_file);
1623 VG_(free)(dhat_out_file);
1624 return;
1627 // Write to data file.
1628 FP("{\"dhatFileVersion\":2\n");
1630 // The output mode, block booleans, and byte/block units.
1631 if (clo_mode == Heap) {
1632 FP(",\"mode\":\"heap\",\"verb\":\"Allocated\"\n");
1633 FP(",\"bklt\":true,\"bkacc\":true\n");
1634 } else if (clo_mode == Copy) {
1635 FP(",\"mode\":\"copy\",\"verb\":\"Copied\"\n");
1636 FP(",\"bklt\":false,\"bkacc\":false\n");
1637 } else if (clo_mode == AdHoc) {
1638 FP(",\"mode\":\"ad-hoc\",\"verb\":\"Occurred\"\n");
1639 FP(",\"bklt\":false,\"bkacc\":false\n");
1640 FP(",\"bu\":\"unit\",\"bsu\":\"units\",\"bksu\":\"events\"\n");
1641 } else {
1642 tl_assert(False);
1645 // The time units.
1646 FP(",\"tu\":\"instrs\",\"Mtu\":\"Minstr\"\n");
1647 if (clo_mode == Heap) {
1648 FP(",\"tuth\":500\n");
1651 // The command.
1652 const HChar* exe = VG_(args_the_exename);
1653 FP(",\"cmd\":\"%s", json_escape(exe));
1654 for (Word i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
1655 const HChar* arg = *(HChar**)VG_(indexXA)(VG_(args_for_client), i);
1656 FP(" %s", json_escape(arg));
1658 FP("\"\n");
1660 // The PID.
1661 FP(",\"pid\":%d\n", VG_(getpid)());
1663 // Times.
1664 FP(",\"te\":%llu\n", g_curr_instrs);
1665 if (clo_mode == Heap) {
1666 FP(",\"tg\":%llu\n", g_tgmax_instrs);
1667 } else {
1668 tl_assert(g_tgmax_instrs == 0);
1671 // APs.
1672 write_PPInfos();
1674 // Frame table.
1675 FP(",\"ftbl\":\n");
1677 // The frame table maps strings to numbers. We want to print it ordered by
1678 // numbers. So we create an array and fill it in from the frame table, then
1679 // print that.
1680 UWord n_frames = next_frame_n;
1681 const HChar** frames =
1682 VG_(malloc)("dh.frames", n_frames * sizeof(const HChar*));
1683 VG_(initIterFM)(frame_tbl);
1684 while (VG_(nextIterFM)(frame_tbl, &keyW, &valW)) {
1685 const HChar* str = (const HChar*)keyW;
1686 UWord n = valW;
1687 frames[n] = str;
1689 VG_(doneIterFM)(frame_tbl);
1691 for (UWord i = 0; i < n_frames; i++) {
1692 FP(" %c\"%s\"\n", i == 0 ? '[' : ',', json_escape(frames[i]));
1694 FP(" ]\n");
1695 VG_(free)(frames);
1697 FP("}\n");
1699 VG_(fclose)(fp);
1700 fp = NULL;
1702 if (VG_(clo_verbosity) == 0) {
1703 return;
1706 // Print brief global stats.
1707 VG_(umsg)("Total: %'llu %s in %'llu %s\n",
1708 g_total_bytes, clo_mode == AdHoc ? "units" : "bytes",
1709 g_total_blocks, clo_mode == AdHoc ? "events" : "blocks");
1710 if (clo_mode == Heap) {
1711 VG_(umsg)("At t-gmax: %'llu bytes in %'llu blocks\n",
1712 g_max_bytes, g_max_blocks);
1713 VG_(umsg)("At t-end: %'llu bytes in %'llu blocks\n",
1714 g_curr_bytes, g_curr_blocks);
1715 VG_(umsg)("Reads: %'llu bytes\n", g_reads_bytes);
1716 VG_(umsg)("Writes: %'llu bytes\n", g_writes_bytes);
1717 } else {
1718 tl_assert(g_max_bytes == 0);
1719 tl_assert(g_max_blocks == 0);
1720 tl_assert(g_curr_bytes == 0);
1721 tl_assert(g_curr_blocks == 0);
1722 tl_assert(g_reads_bytes == 0);
1723 tl_assert(g_writes_bytes == 0);
1726 // Print a how-to-view-the-profile hint.
1727 VG_(umsg)("\n");
1728 VG_(umsg)("To view the resulting profile, open\n");
1729 VG_(umsg)(" file://%s/%s\n", DHAT_VIEW_DIR, "dh_view.html");
1730 VG_(umsg)("in a web browser, click on \"Load...\", "
1731 "and then select the file\n");
1732 VG_(umsg)(" %s\n", dhat_out_file);
1733 VG_(umsg)("The text at the bottom explains the abbreviations used in the "
1734 "output.\n");
1736 VG_(free)(dhat_out_file);
1739 //------------------------------------------------------------//
1740 //--- Initialisation ---//
1741 //------------------------------------------------------------//
1743 static void dh_post_clo_init(void)
1745 if (clo_mode == Heap) {
1746 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1747 VG_(track_pre_mem_read_asciiz) ( dh_handle_noninsn_read_asciiz );
1748 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1752 static void dh_pre_clo_init(void)
1754 VG_(details_name) ("DHAT");
1755 VG_(details_version) (NULL);
1756 VG_(details_description) ("a dynamic heap analysis tool");
1757 VG_(details_copyright_author)(
1758 "Copyright (C) 2010-2018, and GNU GPL'd, by Mozilla Foundation");
1759 VG_(details_bug_reports_to) (VG_BUGS_TO);
1761 // Basic functions.
1762 VG_(basic_tool_funcs) (dh_post_clo_init,
1763 dh_instrument,
1764 dh_fini);
1766 // Needs.
1767 VG_(needs_libc_freeres)();
1768 VG_(needs_cxx_freeres)();
1769 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1770 dh_print_usage,
1771 dh_print_debug_usage);
1772 VG_(needs_client_requests) (dh_handle_client_request);
1773 // VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1774 // dh_expensive_sanity_check);
1775 VG_(needs_malloc_replacement)(dh_malloc,
1776 dh___builtin_new,
1777 dh___builtin_vec_new,
1778 dh_memalign,
1779 dh_calloc,
1780 dh_free,
1781 dh___builtin_delete,
1782 dh___builtin_vec_delete,
1783 dh_realloc,
1784 dh_malloc_usable_size,
1785 0 );
1787 tl_assert(!interval_tree);
1788 tl_assert(!fbc_cache0);
1789 tl_assert(!fbc_cache1);
1791 interval_tree = VG_(newFM)( VG_(malloc),
1792 "dh.interval_tree.1",
1793 VG_(free),
1794 interval_tree_Cmp );
1796 ppinfo = VG_(newFM)( VG_(malloc),
1797 "dh.ppinfo.1",
1798 VG_(free),
1799 NULL/*unboxedcmp*/ );
1802 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1804 //--------------------------------------------------------------------//
1805 //--- end dh_main.c ---//
1806 //--------------------------------------------------------------------//