mc_translate.c: enable further uses of DLexpensive for scalar EQ/NE comparisons
[valgrind.git] / dhat / dh_main.c
blob69e6fb6d01026b990d5e3287798eb0c549197fd8
2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
6 /*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
30 #include "pub_tool_basics.h"
31 #include "pub_tool_clientstate.h"
32 #include "pub_tool_clreq.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
45 #include "dhat.h"
47 #define HISTOGRAM_SIZE_LIMIT 1024
49 //------------------------------------------------------------//
50 //--- Globals ---//
51 //------------------------------------------------------------//
53 // Values for the entire run.
54 static ULong g_total_blocks = 0;
55 static ULong g_total_bytes = 0;
57 // Current values. g_curr_blocks and g_curr_bytes are only used with
58 // clo_mode=Heap.
59 static ULong g_curr_blocks = 0;
60 static ULong g_curr_bytes = 0;
61 static ULong g_curr_instrs = 0; // incremented from generated code
63 // Values at the global max, i.e. when g_curr_bytes peaks.
64 // Only used with clo_mode=Heap.
65 static ULong g_max_blocks = 0;
66 static ULong g_max_bytes = 0;
68 // Time of the global max.
69 static ULong g_tgmax_instrs = 0;
71 // Values for the entire run. Updated each time a block is retired.
72 // Only used with clo_mode=Heap.
73 static ULong g_reads_bytes = 0;
74 static ULong g_writes_bytes = 0;
76 //------------------------------------------------------------//
77 //--- Command line args ---//
78 //------------------------------------------------------------//
80 typedef enum { Heap=55, Copy, AdHoc } ProfileKind;
82 static ProfileKind clo_mode = Heap;
84 static const HChar* clo_dhat_out_file = "dhat.out.%p";
86 static Bool dh_process_cmd_line_option(const HChar* arg)
88 if VG_STR_CLO(arg, "--dhat-out-file", clo_dhat_out_file) {
90 } else if (VG_XACT_CLO(arg, "--mode=heap", clo_mode, Heap)) {
91 } else if (VG_XACT_CLO(arg, "--mode=copy", clo_mode, Copy)) {
92 } else if (VG_XACT_CLO(arg, "--mode=ad-hoc", clo_mode, AdHoc)) {
94 } else {
95 return VG_(replacement_malloc_process_cmd_line_option)(arg);
98 return True;
101 static void dh_print_usage(void)
103 VG_(printf)(
104 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
105 " --mode=heap|copy|ad-hoc profiling mode\n"
109 static void dh_print_debug_usage(void)
111 VG_(printf)(
112 " (none)\n"
116 //------------------------------------------------------------//
117 //--- an Interval Tree of live blocks ---//
118 //------------------------------------------------------------//
120 /* Tracks information about live blocks. */
121 typedef
122 struct {
123 Addr payload;
124 SizeT req_szB;
125 ExeContext* ec; /* allocation ec */
126 ULong allocd_at; /* instruction number */
127 ULong reads_bytes;
128 ULong writes_bytes;
129 /* Approx histogram, one byte per payload byte. Counts latch up
130 therefore at 0xFFFF. Can be NULL if the block is resized or if
131 the block is larger than HISTOGRAM_SIZE_LIMIT. */
132 UShort* histoW; /* [0 .. req_szB-1] */
134 Block;
136 /* May not contain zero-sized blocks. May not contain
137 overlapping blocks. */
138 static WordFM* interval_tree = NULL; /* WordFM* Block* void */
140 /* Here's the comparison function. Since the tree is required
141 to contain non-zero sized, non-overlapping blocks, it's good
142 enough to consider any overlap as a match. */
143 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
145 Block* b1 = (Block*)k1;
146 Block* b2 = (Block*)k2;
147 tl_assert(b1->req_szB > 0);
148 tl_assert(b2->req_szB > 0);
149 if (b1->payload + b1->req_szB <= b2->payload) return -1;
150 if (b2->payload + b2->req_szB <= b1->payload) return 1;
151 return 0;
154 // 2-entry cache for find_Block_containing
155 static Block* fbc_cache0 = NULL;
156 static Block* fbc_cache1 = NULL;
158 static UWord stats__n_fBc_cached = 0;
159 static UWord stats__n_fBc_uncached = 0;
160 static UWord stats__n_fBc_notfound = 0;
162 static Block* find_Block_containing ( Addr a )
164 tl_assert(clo_mode == Heap);
166 if (LIKELY(fbc_cache0
167 && fbc_cache0->payload <= a
168 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
169 // found at 0
170 stats__n_fBc_cached++;
171 return fbc_cache0;
173 if (LIKELY(fbc_cache1
174 && fbc_cache1->payload <= a
175 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
176 // found at 1; swap 0 and 1
177 Block* tmp = fbc_cache0;
178 fbc_cache0 = fbc_cache1;
179 fbc_cache1 = tmp;
180 stats__n_fBc_cached++;
181 return fbc_cache0;
183 Block fake;
184 fake.payload = a;
185 fake.req_szB = 1;
186 UWord foundkey = 1;
187 UWord foundval = 1;
188 Bool found = VG_(lookupFM)( interval_tree,
189 &foundkey, &foundval, (UWord)&fake );
190 if (!found) {
191 stats__n_fBc_notfound++;
192 return NULL;
194 tl_assert(foundval == 0); // we don't store vals in the interval tree
195 tl_assert(foundkey != 1);
196 Block* res = (Block*)foundkey;
197 tl_assert(res != &fake);
198 // put at the top position
199 fbc_cache1 = fbc_cache0;
200 fbc_cache0 = res;
201 stats__n_fBc_uncached++;
202 return res;
205 // delete a block; asserts if not found. (viz, 'a' must be
206 // known to be present.)
207 static void delete_Block_starting_at ( Addr a )
209 tl_assert(clo_mode == Heap);
211 Block fake;
212 fake.payload = a;
213 fake.req_szB = 1;
214 Bool found = VG_(delFromFM)( interval_tree,
215 NULL, NULL, (Addr)&fake );
216 tl_assert(found);
217 fbc_cache0 = fbc_cache1 = NULL;
220 //------------------------------------------------------------//
221 //--- a FM of allocation points (APs) ---//
222 //------------------------------------------------------------//
224 typedef
225 struct {
226 // The program point that we're summarising stats for.
227 ExeContext* ec;
229 // Total number of blocks and bytes allocated by this PP.
230 ULong total_blocks;
231 ULong total_bytes;
233 // The current number of blocks and bytes live for this PP.
234 // Only used with clo_mode=Heap.
235 ULong curr_blocks;
236 ULong curr_bytes;
238 // Values at the PP max, i.e. when this PP's curr_bytes peaks.
239 // Only used with clo_mode=Heap.
240 ULong max_blocks; // Blocks at the PP max.
241 ULong max_bytes; // The PP max, measured in bytes.
243 // Values at the global max.
244 // Only used with clo_mode=Heap.
245 ULong at_tgmax_blocks;
246 ULong at_tgmax_bytes;
248 // Total lifetimes of all blocks allocated by this PP. Includes blocks
249 // explicitly freed and blocks implicitly freed at termination.
250 // Only used with clo_mode=Heap.
251 ULong total_lifetimes_instrs;
253 // Number of blocks freed by this PP. (Only used in assertions.)
254 // Only used with clo_mode=Heap.
255 ULong freed_blocks;
257 // Total number of reads and writes in all blocks allocated
258 // by this PP. Only used with clo_mode=Heap.
259 ULong reads_bytes;
260 ULong writes_bytes;
262 /* Histogram information. We maintain a histogram aggregated for
263 all retiring Blocks allocated by this PP, but only if:
264 - this PP has only ever allocated objects of one size
265 - that size is <= HISTOGRAM_SIZE_LIMIT
266 What we need therefore is a mechanism to see if this PP
267 has only ever allocated blocks of one size.
269 3 states:
270 Unknown because no retirement yet
271 Exactly xsize all retiring blocks are of this size
272 Mixed multiple different sizes seen
274 Only used with clo_mode=Heap.
276 enum { Unknown=999, Exactly, Mixed } xsize_tag;
277 SizeT xsize;
278 UInt* histo; /* [0 .. xsize-1] */
280 PPInfo;
282 /* maps ExeContext*'s to PPInfo*'s. Note that the keys must match the
283 .ec field in the values. */
284 static WordFM* ppinfo = NULL; /* WordFM* ExeContext* PPInfo* */
286 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
287 // all PPInfos. Note that this is moderately expensive so we avoid calling it
288 // on every allocation.
289 static void check_for_peak(void)
291 tl_assert(clo_mode == Heap);
293 if (g_curr_bytes == g_max_bytes) {
294 // It's a peak. (If there are multiple equal peaks we record the latest
295 // one.)
296 UWord keyW, valW;
297 VG_(initIterFM)(ppinfo);
298 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
299 PPInfo* ppi = (PPInfo*)valW;
300 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
301 ppi->at_tgmax_blocks = ppi->curr_blocks;
302 ppi->at_tgmax_bytes = ppi->curr_bytes;
304 VG_(doneIterFM)(ppinfo);
308 /* 'bk' is being introduced (has just been allocated). Find the
309 relevant PPInfo entry for it, or create one, based on the block's
310 allocation EC. Then, update the PPInfo to the extent that we
311 actually can, to reflect the allocation. */
312 static void intro_Block(Block* bk)
314 tl_assert(bk);
315 tl_assert(bk->ec);
317 PPInfo* ppi = NULL;
318 UWord keyW = 0;
319 UWord valW = 0;
320 Bool found = VG_(lookupFM)( ppinfo,
321 &keyW, &valW, (UWord)bk->ec );
322 if (found) {
323 ppi = (PPInfo*)valW;
324 tl_assert(keyW == (UWord)bk->ec);
325 } else {
326 ppi = VG_(malloc)( "dh.intro_Block.1", sizeof(PPInfo) );
327 VG_(memset)(ppi, 0, sizeof(*ppi));
328 ppi->ec = bk->ec;
329 Bool present = VG_(addToFM)( ppinfo,
330 (UWord)bk->ec, (UWord)ppi );
331 tl_assert(!present);
332 if (clo_mode == Heap) {
333 // histo stuff
334 tl_assert(ppi->freed_blocks == 0);
335 ppi->xsize_tag = Unknown;
336 ppi->xsize = 0;
337 if (0) VG_(printf)("ppi %p --> Unknown\n", ppi);
341 tl_assert(ppi->ec == bk->ec);
343 // Update global stats and PPInfo stats.
345 g_total_blocks++;
346 g_total_bytes += bk->req_szB;
348 ppi->total_blocks++;
349 ppi->total_bytes += bk->req_szB;
351 if (clo_mode == Heap) {
352 g_curr_blocks++;
353 g_curr_bytes += bk->req_szB;
355 ppi->curr_blocks++;
356 ppi->curr_bytes += bk->req_szB;
358 // The use of `>=` rather than `>` means that if there are multiple equal
359 // peaks we record the latest one, like `check_for_peak` does.
360 if (g_curr_bytes >= g_max_bytes) {
361 g_max_blocks = g_curr_blocks;
362 g_max_bytes = g_curr_bytes;
363 g_tgmax_instrs = g_curr_instrs;
365 ppi->max_blocks = ppi->curr_blocks;
366 ppi->max_bytes = ppi->curr_bytes;
371 /* 'bk' is retiring (being freed). Find the relevant PPInfo entry for
372 it, which must already exist. Then, fold info from 'bk' into that
373 entry. 'because_freed' is True if the block is retiring because
374 the client has freed it. If it is False then the block is retiring
375 because the program has finished, in which case we want to skip the
376 updates of the total blocks live etc for this PP, but still fold in
377 the access counts and histo data that have so far accumulated for
378 the block. */
379 static void retire_Block(Block* bk, Bool because_freed)
381 tl_assert(clo_mode == Heap);
382 tl_assert(bk);
383 tl_assert(bk->ec);
385 PPInfo* ppi = NULL;
386 UWord keyW = 0;
387 UWord valW = 0;
388 Bool found = VG_(lookupFM)( ppinfo,
389 &keyW, &valW, (UWord)bk->ec );
390 tl_assert(found);
391 ppi = (PPInfo*)valW;
392 tl_assert(ppi->ec == bk->ec);
394 // update stats following this free.
395 if (0)
396 VG_(printf)("ec %p ppi->c_by_l %llu bk->rszB %llu\n",
397 bk->ec, ppi->curr_bytes, (ULong)bk->req_szB);
399 if (because_freed) {
400 // Total bytes is coming down from a possible peak.
401 check_for_peak();
403 // Then update global stats.
404 tl_assert(g_curr_blocks >= 1);
405 tl_assert(g_curr_bytes >= bk->req_szB);
406 g_curr_blocks--;
407 g_curr_bytes -= bk->req_szB;
409 // Then update PPInfo stats.
410 tl_assert(ppi->curr_blocks >= 1);
411 tl_assert(ppi->curr_bytes >= bk->req_szB);
412 ppi->curr_blocks--;
413 ppi->curr_bytes -= bk->req_szB;
415 ppi->freed_blocks++;
418 tl_assert(bk->allocd_at <= g_curr_instrs);
419 ppi->total_lifetimes_instrs += (g_curr_instrs - bk->allocd_at);
421 // access counts
422 ppi->reads_bytes += bk->reads_bytes;
423 ppi->writes_bytes += bk->writes_bytes;
424 g_reads_bytes += bk->reads_bytes;
425 g_writes_bytes += bk->writes_bytes;
427 // histo stuff. First, do state transitions for xsize/xsize_tag.
428 switch (ppi->xsize_tag) {
430 case Unknown:
431 tl_assert(ppi->xsize == 0);
432 tl_assert(ppi->freed_blocks == 1 || ppi->freed_blocks == 0);
433 tl_assert(!ppi->histo);
434 ppi->xsize_tag = Exactly;
435 ppi->xsize = bk->req_szB;
436 if (0) VG_(printf)("ppi %p --> Exactly(%lu)\n", ppi, ppi->xsize);
437 // and allocate the histo
438 if (bk->histoW) {
439 ppi->histo = VG_(malloc)("dh.retire_Block.1",
440 ppi->xsize * sizeof(UInt));
441 VG_(memset)(ppi->histo, 0, ppi->xsize * sizeof(UInt));
443 break;
445 case Exactly:
446 //tl_assert(ppi->freed_blocks > 1);
447 if (bk->req_szB != ppi->xsize) {
448 if (0) VG_(printf)("ppi %p --> Mixed(%lu -> %lu)\n",
449 ppi, ppi->xsize, bk->req_szB);
450 ppi->xsize_tag = Mixed;
451 ppi->xsize = 0;
452 // deallocate the histo, if any
453 if (ppi->histo) {
454 VG_(free)(ppi->histo);
455 ppi->histo = NULL;
458 break;
460 case Mixed:
461 //tl_assert(ppi->freed_blocks > 1);
462 break;
464 default:
465 tl_assert(0);
468 // See if we can fold the histo data from this block into
469 // the data for the PP.
470 if (ppi->xsize_tag == Exactly && ppi->histo && bk->histoW) {
471 tl_assert(ppi->xsize == bk->req_szB);
472 UWord i;
473 for (i = 0; i < ppi->xsize; i++) {
474 // FIXME: do something better in case of overflow of ppi->histo[..]
475 // Right now, at least don't let it overflow/wrap around
476 if (ppi->histo[i] <= 0xFFFE0000)
477 ppi->histo[i] += (UInt)bk->histoW[i];
479 if (0) VG_(printf)("fold in, PP = %p\n", ppi);
482 #if 0
483 if (bk->histoB) {
484 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
485 UWord i;
486 for (i = 0; i < bk->req_szB; i++)
487 VG_(printf)("%u ", (UInt)bk->histoB[i]);
488 VG_(printf)("\n");
489 } else {
490 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
492 #endif
495 /* This handles block resizing. When a block with PP 'ec' has a
496 size change of 'delta', call here to update the PPInfo. */
497 static void resize_Block(ExeContext* ec, SizeT old_req_szB, SizeT new_req_szB)
499 tl_assert(clo_mode == Heap);
501 Long delta = (Long)new_req_szB - (Long)old_req_szB;
502 PPInfo* ppi = NULL;
503 UWord keyW = 0;
504 UWord valW = 0;
505 Bool found = VG_(lookupFM)( ppinfo,
506 &keyW, &valW, (UWord)ec );
508 tl_assert(found);
509 ppi = (PPInfo*)valW;
510 tl_assert(ppi->ec == ec);
512 if (delta < 0) {
513 tl_assert(ppi->curr_bytes >= -delta);
514 tl_assert(g_curr_bytes >= -delta);
516 // Total bytes might be coming down from a possible peak.
517 check_for_peak();
520 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
521 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
523 // A reasonable alternative would be to leave total_blocks unchanged and
524 // increment total_bytes by delta (but only if delta is positive). But then
525 // calls to realloc wouldn't be counted towards the total_blocks count,
526 // which is undesirable.
528 // Update global stats and PPInfo stats.
530 g_total_blocks++;
531 g_total_bytes += new_req_szB;
533 ppi->total_blocks++;
534 ppi->total_bytes += new_req_szB;
536 g_curr_blocks += 0; // unchanged
537 g_curr_bytes += delta;
539 ppi->curr_blocks += 0; // unchanged
540 ppi->curr_bytes += delta;
542 // The use of `>=` rather than `>` means that if there are multiple equal
543 // peaks we record the latest one, like `check_for_peak` does.
544 if (g_curr_bytes >= g_max_bytes) {
545 g_max_blocks = g_curr_blocks;
546 g_max_bytes = g_curr_bytes;
547 g_tgmax_instrs = g_curr_instrs;
549 ppi->max_blocks = ppi->curr_blocks;
550 ppi->max_bytes = ppi->curr_bytes;
554 //------------------------------------------------------------//
555 //--- update both Block and PPInfos after {m,re}alloc/free ---//
556 //------------------------------------------------------------//
558 static
559 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
560 Bool is_zeroed )
562 tl_assert(p == NULL); // don't handle custom allocators right now
563 SizeT actual_szB;
565 if ((SSizeT)req_szB < 0) return NULL;
567 if (req_szB == 0) {
568 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
571 // Allocate and zero if necessary
572 if (!p) {
573 p = VG_(cli_malloc)( req_alignB, req_szB );
574 if (!p) {
575 return NULL;
577 if (is_zeroed) VG_(memset)(p, 0, req_szB);
578 actual_szB = VG_(cli_malloc_usable_size)(p);
579 tl_assert(actual_szB >= req_szB);
582 if (clo_mode != Heap) {
583 return p;
586 // Make new Block, add to interval_tree.
587 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
588 bk->payload = (Addr)p;
589 bk->req_szB = req_szB;
590 bk->ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
591 bk->allocd_at = g_curr_instrs;
592 bk->reads_bytes = 0;
593 bk->writes_bytes = 0;
594 // Set up histogram array, if the block isn't too large.
595 bk->histoW = NULL;
596 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
597 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
598 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
601 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
602 tl_assert(!present);
603 fbc_cache0 = fbc_cache1 = NULL;
605 intro_Block(bk);
607 return p;
610 static
611 void die_block ( void* p )
613 VG_(cli_free)(p);
615 if (clo_mode != Heap) {
616 return;
619 Block* bk = find_Block_containing( (Addr)p );
620 if (!bk) {
621 return; // bogus free
624 tl_assert(bk->req_szB > 0);
625 // assert the block finder is behaving sanely
626 tl_assert(bk->payload <= (Addr)p);
627 tl_assert( (Addr)p < bk->payload + bk->req_szB );
629 if (bk->payload != (Addr)p) {
630 return; // bogus free
633 retire_Block(bk, True/*because_freed*/);
635 delete_Block_starting_at( bk->payload );
636 if (bk->histoW) {
637 VG_(free)( bk->histoW );
638 bk->histoW = NULL;
640 VG_(free)( bk );
643 static
644 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
646 void* p_new = NULL;
648 tl_assert(new_req_szB > 0); // map 0 to 1
650 if (clo_mode != Heap) {
651 SizeT old_actual_szB = VG_(cli_malloc_usable_size)(p_old);
652 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
653 if (!p_new) {
654 return NULL;
656 VG_(memmove)(p_new, p_old, VG_MIN(old_actual_szB, new_req_szB));
657 VG_(cli_free)(p_old);
658 return p_new;
661 // Find the old block.
662 Block* bk = find_Block_containing( (Addr)p_old );
663 if (!bk) {
664 return NULL; // bogus realloc
667 tl_assert(bk->req_szB > 0);
668 // Assert the block finder is behaving sanely.
669 tl_assert(bk->payload <= (Addr)p_old);
670 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
672 if (bk->payload != (Addr)p_old) {
673 return NULL; // bogus realloc
676 // Keeping the histogram alive in any meaningful way across
677 // block resizing is too darn complicated. Just throw it away.
678 if (bk->histoW) {
679 VG_(free)(bk->histoW);
680 bk->histoW = NULL;
683 // Actually do the allocation, if necessary.
684 if (new_req_szB <= bk->req_szB) {
685 // New size is smaller or same; block not moved.
686 resize_Block(bk->ec, bk->req_szB, new_req_szB);
687 bk->req_szB = new_req_szB;
689 // Update reads/writes for the implicit copy. Even though we didn't
690 // actually do a copy, we act like we did, to match up with the fact
691 // that we treat this as an additional allocation.
692 bk->reads_bytes += new_req_szB;
693 bk->writes_bytes += new_req_szB;
695 p_new = p_old;
697 } else {
698 // New size is bigger; make new block, copy shared contents, free old.
699 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
700 if (!p_new) {
701 // Nb: if realloc fails, NULL is returned but the old block is not
702 // touched. What an awful function.
703 return NULL;
705 tl_assert(p_new != p_old);
707 VG_(memcpy)(p_new, p_old, bk->req_szB);
708 VG_(cli_free)(p_old);
710 // Since the block has moved, we need to re-insert it into the
711 // interval tree at the new place. Do this by removing
712 // and re-adding it.
713 delete_Block_starting_at( (Addr)p_old );
714 // Now 'bk' is no longer in the tree, but the Block itself
715 // is still alive.
717 // Update reads/writes for the copy.
718 bk->reads_bytes += bk->req_szB;
719 bk->writes_bytes += bk->req_szB;
721 // Update the metadata.
722 resize_Block(bk->ec, bk->req_szB, new_req_szB);
723 bk->payload = (Addr)p_new;
724 bk->req_szB = new_req_szB;
726 // And re-add it to the interval tree.
727 Bool present
728 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
729 tl_assert(!present);
730 fbc_cache0 = fbc_cache1 = NULL;
733 return p_new;
736 //------------------------------------------------------------//
737 //--- malloc() et al replacement wrappers ---//
738 //------------------------------------------------------------//
740 static void* dh_malloc ( ThreadId tid, SizeT szB )
742 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
745 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
747 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
750 static void* dh___builtin_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB )
752 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
755 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
757 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
760 static void* dh___builtin_vec_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB )
762 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
765 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
767 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
770 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
772 return new_block( tid, NULL, szB, alignB, False );
775 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
777 die_block(p);
780 static void dh___builtin_delete ( ThreadId tid, void* p )
782 die_block(p);
785 static void dh___builtin_delete_aligned ( ThreadId tid, void* p, SizeT align )
787 die_block(p);
790 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
792 die_block(p);
795 static void dh___builtin_vec_delete_aligned ( ThreadId tid, void* p, SizeT align )
797 die_block(p);
800 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
802 if (p_old == NULL) {
803 return dh_malloc(tid, new_szB);
805 if (new_szB == 0) {
806 dh_free(tid, p_old);
807 return NULL;
809 return renew_block(tid, p_old, new_szB);
812 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
814 if (clo_mode != Heap) {
815 return VG_(cli_malloc_usable_size)(p);
818 Block* bk = find_Block_containing( (Addr)p );
819 return bk ? bk->req_szB : 0;
822 //------------------------------------------------------------//
823 //--- memory references ---//
824 //------------------------------------------------------------//
826 static
827 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
829 UWord i, offMin, offMax1;
830 offMin = addr - bk->payload;
831 tl_assert(offMin < bk->req_szB);
832 offMax1 = offMin + szB;
833 if (offMax1 > bk->req_szB)
834 offMax1 = bk->req_szB;
835 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
836 for (i = offMin; i < offMax1; i++) {
837 UShort n = bk->histoW[i];
838 if (n < 0xFFFF) n++;
839 bk->histoW[i] = n;
843 static VG_REGPARM(2)
844 void dh_handle_write ( Addr addr, UWord szB )
846 tl_assert(clo_mode == Heap);
848 Block* bk = find_Block_containing(addr);
849 if (bk) {
850 bk->writes_bytes += szB;
851 if (bk->histoW)
852 inc_histo_for_block(bk, addr, szB);
856 static VG_REGPARM(2)
857 void dh_handle_read ( Addr addr, UWord szB )
859 tl_assert(clo_mode == Heap);
861 Block* bk = find_Block_containing(addr);
862 if (bk) {
863 bk->reads_bytes += szB;
864 if (bk->histoW)
865 inc_histo_for_block(bk, addr, szB);
869 // Handle reads and writes by syscalls (read == kernel
870 // reads user space, write == kernel writes user space).
871 // Assumes no such read or write spans a heap block
872 // boundary and so we can treat it just as one giant
873 // read or write.
874 static
875 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
876 Addr base, SizeT size )
878 tl_assert(clo_mode == Heap);
880 switch (part) {
881 case Vg_CoreSysCall:
882 dh_handle_read(base, size);
883 break;
884 case Vg_CoreSysCallArgInMem:
885 break;
886 case Vg_CoreTranslate:
887 break;
888 default:
889 tl_assert(0);
893 static
894 void dh_handle_noninsn_read_asciiz(CorePart part, ThreadId tid, const HChar* s,
895 Addr str)
897 tl_assert(clo_mode == Heap);
899 tl_assert(part == Vg_CoreSysCall);
900 dh_handle_noninsn_read(part, tid, s, str, VG_(strlen)((const HChar*)str+1));
903 static
904 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
905 Addr base, SizeT size )
907 tl_assert(clo_mode == Heap);
909 switch (part) {
910 case Vg_CoreSysCall:
911 case Vg_CoreClientReq:
912 dh_handle_write(base, size);
913 break;
914 case Vg_CoreSignal:
915 break;
916 default:
917 tl_assert(0);
921 //------------------------------------------------------------//
922 //--- Instrumentation ---//
923 //------------------------------------------------------------//
925 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
926 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
927 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
928 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
929 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
931 static
932 void add_counter_update(IRSB* sbOut, Int n)
934 #if defined(VG_BIGENDIAN)
935 # define END Iend_BE
936 #elif defined(VG_LITTLEENDIAN)
937 # define END Iend_LE
938 #else
939 # error "Unknown endianness"
940 #endif
941 // Add code to increment 'g_curr_instrs' by 'n', like this:
942 // WrTmp(t1, Load64(&g_curr_instrs))
943 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
944 // Store(&g_curr_instrs, t2)
945 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
946 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
947 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_curr_instrs );
949 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
950 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
951 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
953 addStmtToIRSB( sbOut, st1 );
954 addStmtToIRSB( sbOut, st2 );
955 addStmtToIRSB( sbOut, st3 );
958 static
959 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
960 Int goff_sp)
962 if (clo_mode != Heap) {
963 return;
966 IRType tyAddr = Ity_INVALID;
967 const HChar* hName= NULL;
968 void* hAddr = NULL;
969 IRExpr** argv = NULL;
970 IRDirty* di = NULL;
972 const Int THRESH = 4096 * 4; // somewhat arbitrary
973 const Int rz_szB = VG_STACK_REDZONE_SZB;
975 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
976 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
978 if (isWrite) {
979 hName = "dh_handle_write";
980 hAddr = &dh_handle_write;
981 } else {
982 hName = "dh_handle_read";
983 hAddr = &dh_handle_read;
986 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
988 /* Add the helper. */
989 tl_assert(hName);
990 tl_assert(hAddr);
991 tl_assert(argv);
992 di = unsafeIRDirty_0_N( 2/*regparms*/,
993 hName, VG_(fnptr_to_fnentry)( hAddr ),
994 argv );
996 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
997 some arbitrary N. If that fails then addr is in the range (SP -
998 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
999 addr is within a page of SP and so can't possibly be a heap
1000 access, and so can be skipped. */
1001 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
1002 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
1004 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
1005 addStmtToIRSB(
1006 sbOut,
1007 assign(sp_minus_rz,
1008 tyAddr == Ity_I32
1009 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
1010 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
1013 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
1014 addStmtToIRSB(
1015 sbOut,
1016 assign(diff,
1017 tyAddr == Ity_I32
1018 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
1019 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
1022 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
1023 addStmtToIRSB(
1024 sbOut,
1025 assign(guard,
1026 tyAddr == Ity_I32
1027 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
1028 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
1030 di->guard = mkexpr(guard);
1032 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
1035 static
1036 IRSB* dh_instrument ( VgCallbackClosure* closure,
1037 IRSB* sbIn,
1038 const VexGuestLayout* layout,
1039 const VexGuestExtents* vge,
1040 const VexArchInfo* archinfo_host,
1041 IRType gWordTy, IRType hWordTy )
1043 Int i, n = 0;
1044 IRSB* sbOut;
1045 IRTypeEnv* tyenv = sbIn->tyenv;
1047 const Int goff_sp = layout->offset_SP;
1049 // We increment the instruction count in two places:
1050 // - just before any Ist_Exit statements;
1051 // - just before the IRSB's end.
1052 // In the former case, we zero 'n' and then continue instrumenting.
1054 sbOut = deepCopyIRSBExceptStmts(sbIn);
1056 // Copy verbatim any IR preamble preceding the first IMark
1057 i = 0;
1058 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1059 addStmtToIRSB( sbOut, sbIn->stmts[i] );
1060 i++;
1063 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1064 IRStmt* st = sbIn->stmts[i];
1066 if (!st || st->tag == Ist_NoOp) continue;
1068 switch (st->tag) {
1070 case Ist_IMark: {
1071 n++;
1072 break;
1075 case Ist_Exit: {
1076 if (n > 0) {
1077 // Add an increment before the Exit statement, then reset 'n'.
1078 add_counter_update(sbOut, n);
1079 n = 0;
1081 break;
1084 case Ist_WrTmp: {
1085 IRExpr* data = st->Ist.WrTmp.data;
1086 if (data->tag == Iex_Load) {
1087 IRExpr* aexpr = data->Iex.Load.addr;
1088 // Note also, endianness info is ignored. I guess
1089 // that's not interesting.
1090 addMemEvent( sbOut, False/*!isWrite*/,
1091 sizeofIRType(data->Iex.Load.ty),
1092 aexpr, goff_sp );
1094 break;
1097 case Ist_Store: {
1098 IRExpr* data = st->Ist.Store.data;
1099 IRExpr* aexpr = st->Ist.Store.addr;
1100 addMemEvent( sbOut, True/*isWrite*/,
1101 sizeofIRType(typeOfIRExpr(tyenv, data)),
1102 aexpr, goff_sp );
1103 break;
1106 case Ist_Dirty: {
1107 Int dataSize;
1108 IRDirty* d = st->Ist.Dirty.details;
1109 if (d->mFx != Ifx_None) {
1110 /* This dirty helper accesses memory. Collect the details. */
1111 tl_assert(d->mAddr != NULL);
1112 tl_assert(d->mSize != 0);
1113 dataSize = d->mSize;
1114 // Large (eg. 28B, 108B, 512B on x86) data-sized
1115 // instructions will be done inaccurately, but they're
1116 // very rare and this avoids errors from hitting more
1117 // than two cache lines in the simulation.
1118 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1119 addMemEvent( sbOut, False/*!isWrite*/,
1120 dataSize, d->mAddr, goff_sp );
1121 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1122 addMemEvent( sbOut, True/*isWrite*/,
1123 dataSize, d->mAddr, goff_sp );
1124 } else {
1125 tl_assert(d->mAddr == NULL);
1126 tl_assert(d->mSize == 0);
1128 break;
1131 case Ist_CAS: {
1132 /* We treat it as a read and a write of the location. I
1133 think that is the same behaviour as it was before IRCAS
1134 was introduced, since prior to that point, the Vex
1135 front ends would translate a lock-prefixed instruction
1136 into a (normal) read followed by a (normal) write. */
1137 Int dataSize;
1138 IRCAS* cas = st->Ist.CAS.details;
1139 tl_assert(cas->addr != NULL);
1140 tl_assert(cas->dataLo != NULL);
1141 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1142 if (cas->dataHi != NULL)
1143 dataSize *= 2; /* since it's a doubleword-CAS */
1144 addMemEvent( sbOut, False/*!isWrite*/,
1145 dataSize, cas->addr, goff_sp );
1146 addMemEvent( sbOut, True/*isWrite*/,
1147 dataSize, cas->addr, goff_sp );
1148 break;
1151 case Ist_LLSC: {
1152 IRType dataTy;
1153 if (st->Ist.LLSC.storedata == NULL) {
1154 /* LL */
1155 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1156 addMemEvent( sbOut, False/*!isWrite*/,
1157 sizeofIRType(dataTy),
1158 st->Ist.LLSC.addr, goff_sp );
1159 } else {
1160 /* SC */
1161 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1162 addMemEvent( sbOut, True/*isWrite*/,
1163 sizeofIRType(dataTy),
1164 st->Ist.LLSC.addr, goff_sp );
1166 break;
1169 default:
1170 break;
1173 addStmtToIRSB( sbOut, st );
1176 if (n > 0) {
1177 // Add an increment before the SB end.
1178 add_counter_update(sbOut, n);
1180 return sbOut;
1183 #undef binop
1184 #undef mkexpr
1185 #undef mkU32
1186 #undef mkU64
1187 #undef assign
1189 //------------------------------------------------------------//
1190 //--- Client requests ---//
1191 //------------------------------------------------------------//
1193 static Bool dh_handle_client_request(ThreadId tid, UWord* arg, UWord* ret)
1195 switch (arg[0]) {
1196 case VG_USERREQ__DHAT_AD_HOC_EVENT: {
1197 if (clo_mode != AdHoc) {
1198 return False;
1201 SizeT len = (SizeT)arg[1];
1203 // Only the ec and req_szB fields are used by intro_Block().
1204 Block bk;
1205 VG_(memset)(&bk, 0, sizeof(bk));
1206 bk.req_szB = len;
1207 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1209 intro_Block(&bk);
1211 return True;
1214 case _VG_USERREQ__DHAT_COPY: {
1215 SizeT len = (SizeT)arg[1];
1217 if (clo_mode != Copy) {
1218 return False;
1221 // Only the ec and req_szB fields are used by intro_Block().
1222 Block bk;
1223 VG_(memset)(&bk, 0, sizeof(bk));
1224 bk.req_szB = len;
1225 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1227 intro_Block(&bk);
1229 return True;
1232 default:
1233 VG_(message)(
1234 Vg_UserMsg,
1235 "Warning: unknown DHAT client request code %llx\n",
1236 (ULong)arg[0]
1238 return False;
1242 //------------------------------------------------------------//
1243 //--- Finalisation ---//
1244 //------------------------------------------------------------//
1246 // File format notes.
1248 // - The files are JSON, because it's a widely-used format and saves us having
1249 // to write a parser in dh_view.js.
1251 // - We use a comma-first style for the generated JSON. Comma-first style
1252 // moves the special case for arrays/objects from the last item to the
1253 // first. This helps in cases where you can't easily tell in advance the
1254 // size of arrays/objects, such as iterating over a WordFM (because
1255 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1256 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1258 // - We use short field names and minimal whitespace to minimize file sizes.
1260 // Sample output:
1262 // {
1263 // // Version number of the format. Incremented on each
1264 // // backwards-incompatible change. A mandatory integer.
1265 // "dhatFileVersion": 2,
1267 // // The invocation mode. A mandatory, free-form string.
1268 // "mode": "heap",
1270 // // The verb used before above stack frames, i.e. "<verb> at {". A
1271 // // mandatory string.
1272 // "verb": "Allocated",
1274 // // Are block lifetimes recorded? Affects whether some other fields are
1275 // // present. A mandatory boolean.
1276 // "bklt": true,
1278 // // Are block accesses recorded? Affects whether some other fields are
1279 // // present. A mandatory boolean.
1280 // "bkacc": true,
1282 // // Byte/bytes/blocks-position units. Optional strings. "byte", "bytes",
1283 // // and "blocks" are the values used if these fields are omitted.
1284 // "bu": "byte", "bsu": "bytes", "bksu": "blocks",
1286 // // Time units (individual and 1,000,000x). Mandatory strings.
1287 // "tu": "instrs", "Mtu": "Minstr"
1289 // // The "short-lived" time threshold, measures in "tu"s.
1290 // // - bklt=true: a mandatory integer.
1291 // // - bklt=false: omitted.
1292 // "tuth": 500,
1294 // // The executed command. A mandatory string.
1295 // "cmd": "date",
1297 // // The process ID. A mandatory integer.
1298 // "pid": 61129
1300 // // The time at the end of execution (t-end). A mandatory integer.
1301 // "te": 350682
1303 // // The time of the global max (t-gmax).
1304 // // - bklt=true: a mandatory integer.
1305 // // - bklt=false: omitted.
1306 // "tg": 331312,
1308 // // The program points. A mandatory array.
1309 // "pps": [
1310 // {
1311 // // Total bytes and blocks. Mandatory integers.
1312 // "tb": 5, "tbk": 1,
1314 // // Total lifetimes of all blocks allocated at this PP.
1315 // // - bklt=true: a mandatory integer.
1316 // // - bklt=false: omitted.
1317 // "tl": 274,
1319 // // The maximum bytes and blocks for this PP.
1320 // // - bklt=true: mandatory integers.
1321 // // - bklt=false: omitted.
1322 // "mb": 5, "mbk": 1,
1324 // // The bytes and blocks at t-gmax for this PP.
1325 // // - bklt=true: mandatory integers.
1326 // // - bklt=false: omitted.
1327 // "gb": 0, "gbk": 0,
1329 // // The bytes and blocks at t-end for this PP.
1330 // // - bklt=true: mandatory integers.
1331 // // - bklt=false: omitted.
1332 // "eb": 0, "ebk": 0,
1334 // // The reads and writes of blocks for this PP.
1335 // // - bkacc=true: mandatory integers.
1336 // // - bkacc=false: omitted.
1337 // "rb": 41, "wb": 5,
1339 // // The exact accesses of blocks for this PP. Only used when all
1340 // // allocations are the same size and sufficiently small. A negative
1341 // // element indicates run-length encoding of the following integer.
1342 // // E.g. `-3, 4` means "three 4s in a row".
1343 // // - bkacc=true: an optional array of integers.
1344 // // - bkacc=false: omitted.
1345 // "acc": [5, -3, 4, 2],
1347 // // Frames. Each element is an index into the "ftbl" array below.
1348 // // - All modes: A mandatory array of integers.
1349 // "fs": [1, 2, 3]
1350 // }
1351 // ],
1353 // // Frame table. A mandatory array of strings.
1354 // "ftbl": [
1355 // "[root]",
1356 // "0x4AA1D9F: _nl_normalize_codeset (l10nflist.c:332)",
1357 // "0x4A9B414: _nl_load_locale_from_archive (loadarchive.c:173)",
1358 // "0x4A9A2BE: _nl_find_locale (findlocale.c:153)"
1359 // ]
1360 // }
1362 static VgFile* fp;
1364 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1366 // The frame table holds unique frames.
1367 static WordFM* frame_tbl = NULL;
1368 static UWord next_frame_n = 0;
1370 static Word frame_cmp(UWord a, UWord b)
1372 return VG_(strcmp)((const HChar*)a, (const HChar*)b);
1375 static HChar hex_digit_to_ascii_char(UChar d)
1377 d = d & 0xf;
1378 return (d < 10) ? ('0' + d) : ('a' + (d - 10));
1381 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1383 // Returns the original string if no escaping was required. Returns a pointer
1384 // to a static buffer if escaping was required. Therefore, the return value is
1385 // only valid until the next call to this function.
1386 static const HChar* json_escape(const HChar* s)
1388 static HChar* buf = NULL;
1389 static SizeT bufcap = 0;
1391 // Do we need any escaping?
1392 SizeT extra = 0;
1393 const HChar* p = s;
1394 while (*p) {
1395 UChar c = *p;
1396 if (c == '"' || c == '\\') {
1397 extra += 1;
1398 } else if (c <= 0x1f) {
1399 extra += 5;
1401 p++;
1403 SizeT len = p - s;
1405 if (extra == 0) {
1406 // No escaping needed.
1407 return s;
1410 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1411 // necessary.
1412 SizeT newcap = len + extra + 1;
1413 if (bufcap < newcap) {
1414 buf = VG_(realloc)("dh.json", buf, newcap);
1415 bufcap = newcap;
1418 p = s;
1419 HChar* q = buf;
1420 while (*p) {
1421 UChar c = *p;
1422 if (c == '"') {
1423 *q++ = '\\';
1424 *q++ = '"';
1425 } else if (c == '\\') {
1426 *q++ = '\\';
1427 *q++ = '\\';
1428 } else if (c <= 0x1f) {
1429 *q++ = '\\';
1430 *q++ = 'u';
1431 *q++ = '0';
1432 *q++ = '0';
1433 *q++ = hex_digit_to_ascii_char((c & 0x00f0) >> 4);
1434 *q++ = hex_digit_to_ascii_char(c & 0x000f);
1435 } else {
1436 *q++ = c;
1438 p++;
1440 *q = '\0';
1442 return buf;
1445 static void write_PPInfo_frame(UInt n, DiEpoch ep, Addr ip, void* opaque)
1447 Bool* is_first = (Bool*)opaque;
1448 InlIPCursor* iipc = VG_(new_IIPC)(ep, ip);
1450 do {
1451 const HChar* buf = VG_(describe_IP)(ep, ip, iipc);
1453 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1454 // `realloc`, `operator new`) because they're boring and clog up the
1455 // output.
1456 if (VG_(strstr)(buf, "vg_replace_malloc.c")) {
1457 continue;
1460 // If this description has been seen before, get its number. Otherwise,
1461 // give it a new number and put it in the table.
1462 UWord keyW = 0, valW = 0;
1463 UWord frame_n = 0;
1464 Bool found = VG_(lookupFM)(frame_tbl, &keyW, &valW, (UWord)buf);
1465 if (found) {
1466 //const HChar* str = (const HChar*)keyW;
1467 //tl_assert(0 == VG_(strcmp)(buf, str));
1468 frame_n = valW;
1469 } else {
1470 // `buf` is a static buffer, we must copy it.
1471 const HChar* str = VG_(strdup)("dh.frame_tbl.3", buf);
1472 frame_n = next_frame_n++;
1473 Bool present = VG_(addToFM)(frame_tbl, (UWord)str, frame_n);
1474 tl_assert(!present);
1477 FP("%c%lu", *is_first ? '[' : ',', frame_n);
1478 *is_first = False;
1480 } while (VG_(next_IIPC)(iipc));
1482 VG_(delete_IIPC)(iipc);
1485 static void write_PPInfo(PPInfo* ppi, Bool is_first)
1487 FP(" %c{\"tb\":%llu,\"tbk\":%llu\n",
1488 is_first ? '[' : ',',
1489 ppi->total_bytes, ppi->total_blocks);
1491 if (clo_mode == Heap) {
1492 tl_assert(ppi->total_blocks >= ppi->max_blocks);
1493 tl_assert(ppi->total_bytes >= ppi->max_bytes);
1495 FP(" ,\"tl\":%llu\n",
1496 ppi->total_lifetimes_instrs);
1497 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1498 ppi->max_bytes, ppi->max_blocks);
1499 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1500 ppi->at_tgmax_bytes, ppi->at_tgmax_blocks);
1501 FP(" ,\"eb\":%llu,\"ebk\":%llu\n",
1502 ppi->curr_bytes, ppi->curr_blocks);
1503 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1504 ppi->reads_bytes, ppi->writes_bytes);
1506 if (ppi->histo && ppi->xsize_tag == Exactly) {
1507 FP(" ,\"acc\":[");
1509 // Simple run-length encoding: when N entries in a row have the same
1510 // value M, we print "-N,M". If there is just one in a row, we just
1511 // print "M". This reduces file size significantly.
1512 UShort repval = 0;
1513 Int reps = 0;
1514 for (UWord i = 0; i < ppi->xsize; i++) {
1515 UShort h = ppi->histo[i];
1516 if (repval == h) {
1517 // Continue current run.
1518 reps++;
1519 } else {
1520 // End of run; print it.
1521 if (reps == 1) {
1522 FP("%u,", repval);
1523 } else if (reps > 1) {
1524 FP("-%d,%u,", reps, repval);
1526 reps = 1;
1527 repval = h;
1530 // Print the final run.
1531 if (reps == 1) {
1532 FP("%u", repval);
1533 } else if (reps > 1) {
1534 FP("-%d,%u", reps, repval);
1537 FP("]\n");
1539 } else {
1540 tl_assert(ppi->curr_bytes == 0);
1541 tl_assert(ppi->curr_blocks == 0);
1542 tl_assert(ppi->max_bytes == 0);
1543 tl_assert(ppi->max_blocks == 0);
1544 tl_assert(ppi->at_tgmax_bytes == 0);
1545 tl_assert(ppi->at_tgmax_blocks == 0);
1546 tl_assert(ppi->total_lifetimes_instrs == 0);
1547 tl_assert(ppi->freed_blocks == 0);
1548 tl_assert(ppi->reads_bytes == 0);
1549 tl_assert(ppi->writes_bytes == 0);
1550 tl_assert(ppi->xsize_tag == 0);
1551 tl_assert(ppi->xsize == 0);
1552 tl_assert(ppi->histo == NULL);
1555 FP(" ,\"fs\":");
1556 Bool is_first_frame = True;
1557 VG_(apply_ExeContext)(write_PPInfo_frame, &is_first_frame, ppi->ec);
1558 FP("]\n");
1560 FP(" }\n");
1563 static void write_PPInfos(void)
1565 UWord keyW, valW;
1567 FP(",\"pps\":\n");
1569 VG_(initIterFM)(ppinfo);
1570 Bool is_first = True;
1571 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
1572 PPInfo* ppi = (PPInfo*)valW;
1573 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
1574 write_PPInfo(ppi, is_first);
1575 is_first = False;
1577 VG_(doneIterFM)(ppinfo);
1579 if (is_first) {
1580 // We didn't print any elements. This happens if ppinfo is empty.
1581 FP(" [\n");
1584 FP(" ]\n");
1587 static void dh_fini(Int exit_status)
1589 // This function does lots of allocations that it doesn't bother to free,
1590 // because execution is almost over anyway.
1592 UWord keyW, valW;
1594 // Total bytes might be at a possible peak.
1595 if (clo_mode == Heap) {
1596 check_for_peak();
1598 // Before printing statistics, we must harvest various stats (such as
1599 // lifetimes and accesses) for all the blocks that are still alive.
1600 VG_(initIterFM)( interval_tree );
1601 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1602 Block* bk = (Block*)keyW;
1603 tl_assert(valW == 0);
1604 tl_assert(bk);
1605 retire_Block(bk, False/*!because_freed*/);
1607 VG_(doneIterFM)( interval_tree );
1609 // Stats.
1610 if (VG_(clo_stats)) {
1611 VG_(dmsg)(" dhat: find_Block_containing:\n");
1612 VG_(dmsg)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1613 stats__n_fBc_cached + stats__n_fBc_uncached,
1614 stats__n_fBc_cached,
1615 stats__n_fBc_uncached);
1616 VG_(dmsg)(" notfound: %'lu\n", stats__n_fBc_notfound);
1617 VG_(dmsg)("\n");
1621 // Create the frame table, and insert the special "[root]" node at index 0.
1622 frame_tbl = VG_(newFM)(VG_(malloc),
1623 "dh.frame_tbl.1",
1624 VG_(free),
1625 frame_cmp);
1626 const HChar* root = VG_(strdup)("dh.frame_tbl.2", "[root]");
1627 Bool present = VG_(addToFM)(frame_tbl, (UWord)root, 0);
1628 tl_assert(!present);
1629 next_frame_n = 1;
1631 // Setup output filename. Nb: it's important to do this now, i.e. as late
1632 // as possible. If we do it at start-up and the program forks and the
1633 // output file format string contains a %p (pid) specifier, both the parent
1634 // and child will incorrectly write to the same file; this happened in
1635 // 3.3.0.
1636 HChar* dhat_out_file =
1637 VG_(expand_file_name)("--dhat-out-file", clo_dhat_out_file);
1639 fp = VG_(fopen)(dhat_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1640 VKI_S_IRUSR|VKI_S_IWUSR);
1641 if (!fp) {
1642 VG_(umsg)("error: can't open DHAT output file '%s'\n", dhat_out_file);
1643 VG_(free)(dhat_out_file);
1644 return;
1647 // Write to data file.
1648 FP("{\"dhatFileVersion\":2\n");
1650 // The output mode, block booleans, and byte/block units.
1651 if (clo_mode == Heap) {
1652 FP(",\"mode\":\"heap\",\"verb\":\"Allocated\"\n");
1653 FP(",\"bklt\":true,\"bkacc\":true\n");
1654 } else if (clo_mode == Copy) {
1655 FP(",\"mode\":\"copy\",\"verb\":\"Copied\"\n");
1656 FP(",\"bklt\":false,\"bkacc\":false\n");
1657 } else if (clo_mode == AdHoc) {
1658 FP(",\"mode\":\"ad-hoc\",\"verb\":\"Occurred\"\n");
1659 FP(",\"bklt\":false,\"bkacc\":false\n");
1660 FP(",\"bu\":\"unit\",\"bsu\":\"units\",\"bksu\":\"events\"\n");
1661 } else {
1662 tl_assert(False);
1665 // The time units.
1666 FP(",\"tu\":\"instrs\",\"Mtu\":\"Minstr\"\n");
1667 if (clo_mode == Heap) {
1668 FP(",\"tuth\":500\n");
1671 // The command.
1672 const HChar* exe = VG_(args_the_exename);
1673 FP(",\"cmd\":\"%s", json_escape(exe));
1674 for (Word i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
1675 const HChar* arg = *(HChar**)VG_(indexXA)(VG_(args_for_client), i);
1676 FP(" %s", json_escape(arg));
1678 FP("\"\n");
1680 // The PID.
1681 FP(",\"pid\":%d\n", VG_(getpid)());
1683 // Times.
1684 FP(",\"te\":%llu\n", g_curr_instrs);
1685 if (clo_mode == Heap) {
1686 FP(",\"tg\":%llu\n", g_tgmax_instrs);
1687 } else {
1688 tl_assert(g_tgmax_instrs == 0);
1691 // APs.
1692 write_PPInfos();
1694 // Frame table.
1695 FP(",\"ftbl\":\n");
1697 // The frame table maps strings to numbers. We want to print it ordered by
1698 // numbers. So we create an array and fill it in from the frame table, then
1699 // print that.
1700 UWord n_frames = next_frame_n;
1701 const HChar** frames =
1702 VG_(malloc)("dh.frames", n_frames * sizeof(const HChar*));
1703 VG_(initIterFM)(frame_tbl);
1704 while (VG_(nextIterFM)(frame_tbl, &keyW, &valW)) {
1705 const HChar* str = (const HChar*)keyW;
1706 UWord n = valW;
1707 frames[n] = str;
1709 VG_(doneIterFM)(frame_tbl);
1711 for (UWord i = 0; i < n_frames; i++) {
1712 FP(" %c\"%s\"\n", i == 0 ? '[' : ',', json_escape(frames[i]));
1714 FP(" ]\n");
1715 VG_(free)(frames);
1717 FP("}\n");
1719 VG_(fclose)(fp);
1720 fp = NULL;
1722 if (VG_(clo_verbosity) == 0) {
1723 return;
1726 // Print brief global stats.
1727 VG_(umsg)("Total: %'llu %s in %'llu %s\n",
1728 g_total_bytes, clo_mode == AdHoc ? "units" : "bytes",
1729 g_total_blocks, clo_mode == AdHoc ? "events" : "blocks");
1730 if (clo_mode == Heap) {
1731 VG_(umsg)("At t-gmax: %'llu bytes in %'llu blocks\n",
1732 g_max_bytes, g_max_blocks);
1733 VG_(umsg)("At t-end: %'llu bytes in %'llu blocks\n",
1734 g_curr_bytes, g_curr_blocks);
1735 VG_(umsg)("Reads: %'llu bytes\n", g_reads_bytes);
1736 VG_(umsg)("Writes: %'llu bytes\n", g_writes_bytes);
1737 } else {
1738 tl_assert(g_max_bytes == 0);
1739 tl_assert(g_max_blocks == 0);
1740 tl_assert(g_curr_bytes == 0);
1741 tl_assert(g_curr_blocks == 0);
1742 tl_assert(g_reads_bytes == 0);
1743 tl_assert(g_writes_bytes == 0);
1746 // Print a how-to-view-the-profile hint.
1747 VG_(umsg)("\n");
1748 VG_(umsg)("To view the resulting profile, open\n");
1749 VG_(umsg)(" file://%s/%s\n", DHAT_VIEW_DIR, "dh_view.html");
1750 VG_(umsg)("in a web browser, click on \"Load...\", "
1751 "and then select the file\n");
1752 VG_(umsg)(" %s\n", dhat_out_file);
1753 VG_(umsg)("The text at the bottom explains the abbreviations used in the "
1754 "output.\n");
1756 VG_(free)(dhat_out_file);
1759 //------------------------------------------------------------//
1760 //--- Initialisation ---//
1761 //------------------------------------------------------------//
1763 static void dh_post_clo_init(void)
1765 if (clo_mode == Heap) {
1766 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1767 VG_(track_pre_mem_read_asciiz) ( dh_handle_noninsn_read_asciiz );
1768 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1772 static void dh_pre_clo_init(void)
1774 VG_(details_name) ("DHAT");
1775 VG_(details_version) (NULL);
1776 VG_(details_description) ("a dynamic heap analysis tool");
1777 VG_(details_copyright_author)(
1778 "Copyright (C) 2010-2018, and GNU GPL'd, by Mozilla Foundation");
1779 VG_(details_bug_reports_to) (VG_BUGS_TO);
1781 // Basic functions.
1782 VG_(basic_tool_funcs) (dh_post_clo_init,
1783 dh_instrument,
1784 dh_fini);
1786 // Needs.
1787 VG_(needs_libc_freeres)();
1788 VG_(needs_cxx_freeres)();
1789 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1790 dh_print_usage,
1791 dh_print_debug_usage);
1792 VG_(needs_client_requests) (dh_handle_client_request);
1793 // VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1794 // dh_expensive_sanity_check);
1795 VG_(needs_malloc_replacement)(dh_malloc,
1796 dh___builtin_new,
1797 dh___builtin_new_aligned,
1798 dh___builtin_vec_new,
1799 dh___builtin_vec_new_aligned,
1800 dh_memalign,
1801 dh_calloc,
1802 dh_free,
1803 dh___builtin_delete,
1804 dh___builtin_delete_aligned,
1805 dh___builtin_vec_delete,
1806 dh___builtin_vec_delete_aligned,
1807 dh_realloc,
1808 dh_malloc_usable_size,
1809 0 );
1811 tl_assert(!interval_tree);
1812 tl_assert(!fbc_cache0);
1813 tl_assert(!fbc_cache1);
1815 interval_tree = VG_(newFM)( VG_(malloc),
1816 "dh.interval_tree.1",
1817 VG_(free),
1818 interval_tree_Cmp );
1820 ppinfo = VG_(newFM)( VG_(malloc),
1821 "dh.ppinfo.1",
1822 VG_(free),
1823 NULL/*unboxedcmp*/ );
1826 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1828 //--------------------------------------------------------------------//
1829 //--- end dh_main.c ---//
1830 //--------------------------------------------------------------------//