dhat: remove initial count value from access count histogram user requests
[valgrind.git] / dhat / dh_main.c
blob24d1c2768b3de3e1fb3e00861b42c215eff7dd8b
2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
6 /*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
30 #include "pub_tool_basics.h"
31 #include "pub_tool_clientstate.h"
32 #include "pub_tool_clreq.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
45 #include "dhat.h"
47 #define HISTOGRAM_SIZE_LIMIT 1024
48 #define USER_HISTOGRAM_SIZE_LIMIT 25*HISTOGRAM_SIZE_LIMIT
50 //------------------------------------------------------------//
51 //--- Globals ---//
52 //------------------------------------------------------------//
54 // Values for the entire run.
55 static ULong g_total_blocks = 0;
56 static ULong g_total_bytes = 0;
58 // Current values. g_curr_blocks and g_curr_bytes are only used with
59 // clo_mode=Heap.
60 static ULong g_curr_blocks = 0;
61 static ULong g_curr_bytes = 0;
62 static ULong g_curr_instrs = 0; // incremented from generated code
64 // Values at the global max, i.e. when g_curr_bytes peaks.
65 // Only used with clo_mode=Heap.
66 static ULong g_max_blocks = 0;
67 static ULong g_max_bytes = 0;
69 // Time of the global max.
70 static ULong g_tgmax_instrs = 0;
72 // Values for the entire run. Updated each time a block is retired.
73 // Only used with clo_mode=Heap.
74 static ULong g_reads_bytes = 0;
75 static ULong g_writes_bytes = 0;
77 //------------------------------------------------------------//
78 //--- Command line args ---//
79 //------------------------------------------------------------//
81 typedef enum { Heap=55, Copy, AdHoc } ProfileKind;
83 static ProfileKind clo_mode = Heap;
85 static const HChar* clo_dhat_out_file = "dhat.out.%p";
87 static Bool dh_process_cmd_line_option(const HChar* arg)
89 if VG_STR_CLO(arg, "--dhat-out-file", clo_dhat_out_file) {
91 } else if (VG_XACT_CLO(arg, "--mode=heap", clo_mode, Heap)) {
92 } else if (VG_XACT_CLO(arg, "--mode=copy", clo_mode, Copy)) {
93 } else if (VG_XACT_CLO(arg, "--mode=ad-hoc", clo_mode, AdHoc)) {
95 } else {
96 return VG_(replacement_malloc_process_cmd_line_option)(arg);
99 return True;
102 static void dh_print_usage(void)
104 VG_(printf)(
105 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
106 " --mode=heap|copy|ad-hoc profiling mode\n"
110 static void dh_print_debug_usage(void)
112 VG_(printf)(
113 " (none)\n"
117 //------------------------------------------------------------//
118 //--- an Interval Tree of live blocks ---//
119 //------------------------------------------------------------//
121 /* Tracks information about live blocks. */
122 typedef
123 struct {
124 Addr payload;
125 SizeT req_szB;
126 ExeContext* ec; /* allocation ec */
127 ULong allocd_at; /* instruction number */
128 ULong reads_bytes;
129 ULong writes_bytes;
130 /* Approx histogram, one byte per payload byte. Counts latch up
131 therefore at 0xFFFF. Can be NULL if the block is resized or if
132 the block is larger than HISTOGRAM_SIZE_LIMIT. */
133 UShort* histoW; /* [0 .. req_szB-1] */
135 Block;
137 /* May not contain zero-sized blocks. May not contain
138 overlapping blocks. */
139 static WordFM* interval_tree = NULL; /* WordFM* Block* void */
141 /* Here's the comparison function. Since the tree is required
142 to contain non-zero sized, non-overlapping blocks, it's good
143 enough to consider any overlap as a match. */
144 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
146 Block* b1 = (Block*)k1;
147 Block* b2 = (Block*)k2;
148 tl_assert(b1->req_szB > 0);
149 tl_assert(b2->req_szB > 0);
150 if (b1->payload + b1->req_szB <= b2->payload) return -1;
151 if (b2->payload + b2->req_szB <= b1->payload) return 1;
152 return 0;
155 // 3-entry cache for find_Block_containing
156 static Block* fbc_cache0 = NULL;
157 static Block* fbc_cache1 = NULL;
158 static Block* fbc_cache2 = NULL;
160 static UWord stats__n_fBc_cached0 = 0;
161 static UWord stats__n_fBc_cached1 = 0;
162 static UWord stats__n_fBc_cached2 = 0;
163 static UWord stats__n_fBc_uncached = 0;
164 static UWord stats__n_fBc_notfound = 0;
166 static Block* find_Block_containing ( Addr a )
168 tl_assert(clo_mode == Heap);
170 if (LIKELY(fbc_cache0
171 && fbc_cache0->payload <= a
172 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
173 // found at 0
174 stats__n_fBc_cached0++;
175 return fbc_cache0;
177 if (LIKELY(fbc_cache1
178 && fbc_cache1->payload <= a
179 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
180 // found at 1; swap 0 and 1
181 Block* tmp = fbc_cache1;
182 fbc_cache1 = fbc_cache0;
183 fbc_cache0 = tmp;
184 stats__n_fBc_cached1++;
185 return tmp;
187 if (LIKELY(fbc_cache2
188 && fbc_cache2->payload <= a
189 && a < fbc_cache2->payload + fbc_cache2->req_szB)) {
190 // found at 2; swap 1 and 2
191 Block* tmp = fbc_cache2;
192 fbc_cache2 = fbc_cache1;
193 fbc_cache1 = tmp;
194 stats__n_fBc_cached2++;
195 return tmp;
198 Block fake;
199 fake.payload = a;
200 fake.req_szB = 1;
201 UWord foundkey = 1;
202 UWord foundval = 1;
203 Bool found = VG_(lookupFM)( interval_tree,
204 &foundkey, &foundval, (UWord)&fake );
205 if (!found) {
206 stats__n_fBc_notfound++;
207 return NULL;
209 tl_assert(foundval == 0); // we don't store vals in the interval tree
210 tl_assert(foundkey != 1);
211 Block* res = (Block*)foundkey;
212 tl_assert(res != &fake);
213 // put at the top position
214 fbc_cache2 = fbc_cache1;
215 fbc_cache1 = fbc_cache0;
216 fbc_cache0 = res;
217 stats__n_fBc_uncached++;
218 return res;
221 // delete a block; asserts if not found. (viz, 'a' must be
222 // known to be present.)
223 static void delete_Block_starting_at ( Addr a )
225 tl_assert(clo_mode == Heap);
227 Block fake;
228 fake.payload = a;
229 fake.req_szB = 1;
230 Bool found = VG_(delFromFM)( interval_tree,
231 NULL, NULL, (Addr)&fake );
232 tl_assert(found);
233 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
236 //------------------------------------------------------------//
237 //--- a FM of allocation points (APs) ---//
238 //------------------------------------------------------------//
240 typedef
241 struct {
242 // The program point that we're summarising stats for.
243 ExeContext* ec;
245 // Total number of blocks and bytes allocated by this PP.
246 ULong total_blocks;
247 ULong total_bytes;
249 // The current number of blocks and bytes live for this PP.
250 // Only used with clo_mode=Heap.
251 ULong curr_blocks;
252 ULong curr_bytes;
254 // Values at the PP max, i.e. when this PP's curr_bytes peaks.
255 // Only used with clo_mode=Heap.
256 ULong max_blocks; // Blocks at the PP max.
257 ULong max_bytes; // The PP max, measured in bytes.
259 // Values at the global max.
260 // Only used with clo_mode=Heap.
261 ULong at_tgmax_blocks;
262 ULong at_tgmax_bytes;
264 // Total lifetimes of all blocks allocated by this PP. Includes blocks
265 // explicitly freed and blocks implicitly freed at termination.
266 // Only used with clo_mode=Heap.
267 ULong total_lifetimes_instrs;
269 // Number of blocks freed by this PP. (Only used in assertions.)
270 // Only used with clo_mode=Heap.
271 ULong freed_blocks;
273 // Total number of reads and writes in all blocks allocated
274 // by this PP. Only used with clo_mode=Heap.
275 ULong reads_bytes;
276 ULong writes_bytes;
278 /* Histogram information. We maintain a histogram aggregated for
279 all retiring Blocks allocated by this PP, but only if:
280 - this PP has only ever allocated objects of one size
281 - that size is <= HISTOGRAM_SIZE_LIMIT
282 What we need therefore is a mechanism to see if this PP
283 has only ever allocated blocks of one size.
285 3 states:
286 Unknown because no retirement yet
287 Exactly xsize all retiring blocks are of this size
288 Mixed multiple different sizes seen
290 Only used with clo_mode=Heap.
292 enum { Unknown=999, Exactly, Mixed } xsize_tag;
293 SizeT xsize;
294 UInt* histo; /* [0 .. xsize-1] */
296 PPInfo;
298 /* maps ExeContext*'s to PPInfo*'s. Note that the keys must match the
299 .ec field in the values. */
300 static WordFM* ppinfo = NULL; /* WordFM* ExeContext* PPInfo* */
302 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
303 // all PPInfos. Note that this is moderately expensive so we avoid calling it
304 // on every allocation.
305 static void check_for_peak(void)
307 tl_assert(clo_mode == Heap);
309 if (g_curr_bytes == g_max_bytes) {
310 // It's a peak. (If there are multiple equal peaks we record the latest
311 // one.)
312 UWord keyW, valW;
313 VG_(initIterFM)(ppinfo);
314 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
315 PPInfo* ppi = (PPInfo*)valW;
316 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
317 ppi->at_tgmax_blocks = ppi->curr_blocks;
318 ppi->at_tgmax_bytes = ppi->curr_bytes;
320 VG_(doneIterFM)(ppinfo);
324 /* 'bk' is being introduced (has just been allocated). Find the
325 relevant PPInfo entry for it, or create one, based on the block's
326 allocation EC. Then, update the PPInfo to the extent that we
327 actually can, to reflect the allocation. */
328 static void intro_Block(Block* bk)
330 tl_assert(bk);
331 tl_assert(bk->ec);
333 PPInfo* ppi = NULL;
334 UWord keyW = 0;
335 UWord valW = 0;
336 Bool found = VG_(lookupFM)( ppinfo,
337 &keyW, &valW, (UWord)bk->ec );
338 if (found) {
339 ppi = (PPInfo*)valW;
340 tl_assert(keyW == (UWord)bk->ec);
341 } else {
342 ppi = VG_(malloc)( "dh.intro_Block.1", sizeof(PPInfo) );
343 VG_(memset)(ppi, 0, sizeof(*ppi));
344 ppi->ec = bk->ec;
345 Bool present = VG_(addToFM)( ppinfo,
346 (UWord)bk->ec, (UWord)ppi );
347 tl_assert(!present);
348 if (clo_mode == Heap) {
349 // histo stuff
350 tl_assert(ppi->freed_blocks == 0);
351 ppi->xsize_tag = Unknown;
352 ppi->xsize = 0;
353 if (0) VG_(printf)("ppi %p --> Unknown\n", ppi);
357 tl_assert(ppi->ec == bk->ec);
359 // Update global stats and PPInfo stats.
361 g_total_blocks++;
362 g_total_bytes += bk->req_szB;
364 ppi->total_blocks++;
365 ppi->total_bytes += bk->req_szB;
367 if (clo_mode == Heap) {
368 g_curr_blocks++;
369 g_curr_bytes += bk->req_szB;
371 ppi->curr_blocks++;
372 ppi->curr_bytes += bk->req_szB;
374 // The use of `>=` rather than `>` means that if there are multiple equal
375 // peaks we record the latest one, like `check_for_peak` does.
376 if (g_curr_bytes >= g_max_bytes) {
377 g_max_blocks = g_curr_blocks;
378 g_max_bytes = g_curr_bytes;
379 g_tgmax_instrs = g_curr_instrs;
381 ppi->max_blocks = ppi->curr_blocks;
382 ppi->max_bytes = ppi->curr_bytes;
387 /* 'bk' is retiring (being freed). Find the relevant PPInfo entry for
388 it, which must already exist. Then, fold info from 'bk' into that
389 entry. 'because_freed' is True if the block is retiring because
390 the client has freed it. If it is False then the block is retiring
391 because the program has finished, in which case we want to skip the
392 updates of the total blocks live etc for this PP, but still fold in
393 the access counts and histo data that have so far accumulated for
394 the block. */
395 static void retire_Block(Block* bk, Bool because_freed)
397 tl_assert(clo_mode == Heap);
398 tl_assert(bk);
399 tl_assert(bk->ec);
401 PPInfo* ppi = NULL;
402 UWord keyW = 0;
403 UWord valW = 0;
404 Bool found = VG_(lookupFM)( ppinfo,
405 &keyW, &valW, (UWord)bk->ec );
406 tl_assert(found);
407 ppi = (PPInfo*)valW;
408 tl_assert(ppi->ec == bk->ec);
410 // update stats following this free.
411 if (0)
412 VG_(printf)("ec %p ppi->c_by_l %llu bk->rszB %llu\n",
413 bk->ec, ppi->curr_bytes, (ULong)bk->req_szB);
415 if (because_freed) {
416 // Total bytes is coming down from a possible peak.
417 check_for_peak();
419 // Then update global stats.
420 tl_assert(g_curr_blocks >= 1);
421 tl_assert(g_curr_bytes >= bk->req_szB);
422 g_curr_blocks--;
423 g_curr_bytes -= bk->req_szB;
425 // Then update PPInfo stats.
426 tl_assert(ppi->curr_blocks >= 1);
427 tl_assert(ppi->curr_bytes >= bk->req_szB);
428 ppi->curr_blocks--;
429 ppi->curr_bytes -= bk->req_szB;
431 ppi->freed_blocks++;
434 tl_assert(bk->allocd_at <= g_curr_instrs);
435 ppi->total_lifetimes_instrs += (g_curr_instrs - bk->allocd_at);
437 // access counts
438 ppi->reads_bytes += bk->reads_bytes;
439 ppi->writes_bytes += bk->writes_bytes;
440 g_reads_bytes += bk->reads_bytes;
441 g_writes_bytes += bk->writes_bytes;
443 // histo stuff. First, do state transitions for xsize/xsize_tag.
444 switch (ppi->xsize_tag) {
446 case Unknown:
447 tl_assert(ppi->xsize == 0);
448 tl_assert(ppi->freed_blocks == 1 || ppi->freed_blocks == 0);
449 tl_assert(!ppi->histo);
450 ppi->xsize_tag = Exactly;
451 ppi->xsize = bk->req_szB;
452 if (0) VG_(printf)("ppi %p --> Exactly(%lu)\n", ppi, ppi->xsize);
453 // and allocate the histo
454 if (bk->histoW) {
455 ppi->histo = VG_(malloc)("dh.retire_Block.1",
456 ppi->xsize * sizeof(UInt));
457 VG_(memset)(ppi->histo, 0, ppi->xsize * sizeof(UInt));
459 break;
461 case Exactly:
462 //tl_assert(ppi->freed_blocks > 1);
463 if (bk->req_szB != ppi->xsize) {
464 if (0) VG_(printf)("ppi %p --> Mixed(%lu -> %lu)\n",
465 ppi, ppi->xsize, bk->req_szB);
466 ppi->xsize_tag = Mixed;
467 ppi->xsize = 0;
468 // deallocate the histo, if any
469 if (ppi->histo) {
470 VG_(free)(ppi->histo);
471 ppi->histo = NULL;
474 break;
476 case Mixed:
477 //tl_assert(ppi->freed_blocks > 1);
478 break;
480 default:
481 tl_assert(0);
484 // See if we can fold the histo data from this block into
485 // the data for the PP.
486 if (ppi->xsize_tag == Exactly && ppi->histo && bk->histoW) {
487 tl_assert(ppi->xsize == bk->req_szB);
488 UWord i;
489 for (i = 0; i < ppi->xsize; i++) {
490 // FIXME: do something better in case of overflow of ppi->histo[..]
491 // Right now, at least don't let it overflow/wrap around
492 if (ppi->histo[i] <= 0xFFFE0000)
493 ppi->histo[i] += (UInt)bk->histoW[i];
495 if (0) VG_(printf)("fold in, PP = %p\n", ppi);
498 #if 0
499 if (bk->histoB) {
500 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
501 UWord i;
502 for (i = 0; i < bk->req_szB; i++)
503 VG_(printf)("%u ", (UInt)bk->histoB[i]);
504 VG_(printf)("\n");
505 } else {
506 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
508 #endif
511 /* This handles block resizing. When a block with PP 'ec' has a
512 size change of 'delta', call here to update the PPInfo. */
513 static void resize_Block(ExeContext* ec, SizeT old_req_szB, SizeT new_req_szB)
515 tl_assert(clo_mode == Heap);
517 Long delta = (Long)new_req_szB - (Long)old_req_szB;
518 PPInfo* ppi = NULL;
519 UWord keyW = 0;
520 UWord valW = 0;
521 Bool found = VG_(lookupFM)( ppinfo,
522 &keyW, &valW, (UWord)ec );
524 tl_assert(found);
525 ppi = (PPInfo*)valW;
526 tl_assert(ppi->ec == ec);
528 if (delta < 0) {
529 tl_assert(ppi->curr_bytes >= -delta);
530 tl_assert(g_curr_bytes >= -delta);
532 // Total bytes might be coming down from a possible peak.
533 check_for_peak();
536 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
537 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
539 // A reasonable alternative would be to leave total_blocks unchanged and
540 // increment total_bytes by delta (but only if delta is positive). But then
541 // calls to realloc wouldn't be counted towards the total_blocks count,
542 // which is undesirable.
544 // Update global stats and PPInfo stats.
546 g_total_blocks++;
547 g_total_bytes += new_req_szB;
549 ppi->total_blocks++;
550 ppi->total_bytes += new_req_szB;
552 g_curr_blocks += 0; // unchanged
553 g_curr_bytes += delta;
555 ppi->curr_blocks += 0; // unchanged
556 ppi->curr_bytes += delta;
558 // The use of `>=` rather than `>` means that if there are multiple equal
559 // peaks we record the latest one, like `check_for_peak` does.
560 if (g_curr_bytes >= g_max_bytes) {
561 g_max_blocks = g_curr_blocks;
562 g_max_bytes = g_curr_bytes;
563 g_tgmax_instrs = g_curr_instrs;
565 ppi->max_blocks = ppi->curr_blocks;
566 ppi->max_bytes = ppi->curr_bytes;
570 //------------------------------------------------------------//
571 //--- update both Block and PPInfos after {m,re}alloc/free ---//
572 //------------------------------------------------------------//
574 static
575 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
576 Bool is_zeroed )
578 tl_assert(p == NULL); // don't handle custom allocators right now
579 SizeT actual_szB;
581 if ((SSizeT)req_szB < 0) return NULL;
583 if (req_szB == 0) {
584 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
587 // Allocate and zero if necessary
588 if (!p) {
589 p = VG_(cli_malloc)( req_alignB, req_szB );
590 if (!p) {
591 return NULL;
593 if (is_zeroed) VG_(memset)(p, 0, req_szB);
594 actual_szB = VG_(cli_malloc_usable_size)(p);
595 tl_assert(actual_szB >= req_szB);
598 if (clo_mode != Heap) {
599 return p;
602 // Make new Block, add to interval_tree.
603 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
604 bk->payload = (Addr)p;
605 bk->req_szB = req_szB;
606 bk->ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
607 bk->allocd_at = g_curr_instrs;
608 bk->reads_bytes = 0;
609 bk->writes_bytes = 0;
610 // Set up histogram array, if the block isn't too large.
611 bk->histoW = NULL;
612 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
613 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
614 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
617 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
618 tl_assert(!present);
619 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
621 intro_Block(bk);
623 return p;
626 static
627 void die_block ( void* p )
629 VG_(cli_free)(p);
631 if (clo_mode != Heap) {
632 return;
635 Block* bk = find_Block_containing( (Addr)p );
636 if (!bk) {
637 return; // bogus free
640 tl_assert(bk->req_szB > 0);
641 // assert the block finder is behaving sanely
642 tl_assert(bk->payload <= (Addr)p);
643 tl_assert( (Addr)p < bk->payload + bk->req_szB );
645 if (bk->payload != (Addr)p) {
646 return; // bogus free
649 retire_Block(bk, True/*because_freed*/);
651 delete_Block_starting_at( bk->payload );
652 if (bk->histoW) {
653 VG_(free)( bk->histoW );
654 bk->histoW = NULL;
656 VG_(free)( bk );
659 static
660 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
662 void* p_new = NULL;
664 tl_assert(new_req_szB > 0); // map 0 to 1
666 if (clo_mode != Heap) {
667 SizeT old_actual_szB = VG_(cli_malloc_usable_size)(p_old);
668 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
669 if (!p_new) {
670 return NULL;
672 VG_(memmove)(p_new, p_old, VG_MIN(old_actual_szB, new_req_szB));
673 VG_(cli_free)(p_old);
674 return p_new;
677 // Find the old block.
678 Block* bk = find_Block_containing( (Addr)p_old );
679 if (!bk) {
680 return NULL; // bogus realloc
683 tl_assert(bk->req_szB > 0);
684 // Assert the block finder is behaving sanely.
685 tl_assert(bk->payload <= (Addr)p_old);
686 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
688 if (bk->payload != (Addr)p_old) {
689 return NULL; // bogus realloc
692 // Keeping the histogram alive in any meaningful way across
693 // block resizing is too darn complicated. Just throw it away.
694 if (bk->histoW) {
695 VG_(free)(bk->histoW);
696 bk->histoW = NULL;
699 // Actually do the allocation, if necessary.
700 if (new_req_szB <= bk->req_szB) {
701 // New size is smaller or same; block not moved.
702 resize_Block(bk->ec, bk->req_szB, new_req_szB);
703 bk->req_szB = new_req_szB;
705 // Update reads/writes for the implicit copy. Even though we didn't
706 // actually do a copy, we act like we did, to match up with the fact
707 // that we treat this as an additional allocation.
708 bk->reads_bytes += new_req_szB;
709 bk->writes_bytes += new_req_szB;
711 p_new = p_old;
713 } else {
714 // New size is bigger; make new block, copy shared contents, free old.
715 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
716 if (!p_new) {
717 // Nb: if realloc fails, NULL is returned but the old block is not
718 // touched. What an awful function.
719 return NULL;
721 tl_assert(p_new != p_old);
723 VG_(memcpy)(p_new, p_old, bk->req_szB);
724 VG_(cli_free)(p_old);
726 // Since the block has moved, we need to re-insert it into the
727 // interval tree at the new place. Do this by removing
728 // and re-adding it.
729 delete_Block_starting_at( (Addr)p_old );
730 // Now 'bk' is no longer in the tree, but the Block itself
731 // is still alive.
733 // Update reads/writes for the copy.
734 bk->reads_bytes += bk->req_szB;
735 bk->writes_bytes += bk->req_szB;
737 // Update the metadata.
738 resize_Block(bk->ec, bk->req_szB, new_req_szB);
739 bk->payload = (Addr)p_new;
740 bk->req_szB = new_req_szB;
742 // And re-add it to the interval tree.
743 Bool present
744 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
745 tl_assert(!present);
746 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
749 return p_new;
752 //------------------------------------------------------------//
753 //--- malloc() et al replacement wrappers ---//
754 //------------------------------------------------------------//
756 static void* dh_malloc ( ThreadId tid, SizeT szB )
758 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
761 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
763 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
766 static void* dh___builtin_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB )
768 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
771 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
773 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
776 static void* dh___builtin_vec_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB )
778 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
781 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
783 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
786 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
788 return new_block( tid, NULL, szB, alignB, False );
791 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
793 die_block(p);
796 static void dh___builtin_delete ( ThreadId tid, void* p )
798 die_block(p);
801 static void dh___builtin_delete_aligned ( ThreadId tid, void* p, SizeT align )
803 die_block(p);
806 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
808 die_block(p);
811 static void dh___builtin_vec_delete_aligned ( ThreadId tid, void* p, SizeT align )
813 die_block(p);
816 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
818 if (p_old == NULL) {
819 return dh_malloc(tid, new_szB);
821 if (new_szB == 0) {
822 if (VG_(clo_realloc_zero_bytes_frees) == True) {
823 dh_free(tid, p_old);
824 return NULL;
826 new_szB = 1;
828 return renew_block(tid, p_old, new_szB);
831 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
833 if (clo_mode != Heap) {
834 return VG_(cli_malloc_usable_size)(p);
837 Block* bk = find_Block_containing( (Addr)p );
838 return bk ? bk->req_szB : 0;
841 //------------------------------------------------------------//
842 //--- memory references ---//
843 //------------------------------------------------------------//
845 static
846 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
848 UWord i, offMin, offMax1;
849 offMin = addr - bk->payload;
850 tl_assert(offMin < bk->req_szB);
851 offMax1 = offMin + szB;
852 if (offMax1 > bk->req_szB)
853 offMax1 = bk->req_szB;
854 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
855 for (i = offMin; i < offMax1; i++) {
856 UShort n = bk->histoW[i];
857 if (n < 0xFFFF) n++;
858 bk->histoW[i] = n;
862 static VG_REGPARM(2)
863 void dh_handle_write ( Addr addr, UWord szB )
865 tl_assert(clo_mode == Heap);
867 Block* bk = find_Block_containing(addr);
868 if (bk) {
869 bk->writes_bytes += szB;
870 if (bk->histoW)
871 inc_histo_for_block(bk, addr, szB);
875 static VG_REGPARM(2)
876 void dh_handle_read ( Addr addr, UWord szB )
878 tl_assert(clo_mode == Heap);
880 Block* bk = find_Block_containing(addr);
881 if (bk) {
882 bk->reads_bytes += szB;
883 if (bk->histoW)
884 inc_histo_for_block(bk, addr, szB);
888 // Handle reads and writes by syscalls (read == kernel
889 // reads user space, write == kernel writes user space).
890 // Assumes no such read or write spans a heap block
891 // boundary and so we can treat it just as one giant
892 // read or write.
893 static
894 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
895 Addr base, SizeT size )
897 tl_assert(clo_mode == Heap);
899 switch (part) {
900 case Vg_CoreSysCall:
901 dh_handle_read(base, size);
902 break;
903 case Vg_CoreSysCallArgInMem:
904 break;
905 case Vg_CoreTranslate:
906 break;
907 default:
908 tl_assert(0);
912 static
913 void dh_handle_noninsn_read_asciiz(CorePart part, ThreadId tid, const HChar* s,
914 Addr str)
916 tl_assert(clo_mode == Heap);
918 tl_assert(part == Vg_CoreSysCall);
919 dh_handle_noninsn_read(part, tid, s, str, VG_(strlen)((const HChar*)str+1));
922 static
923 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
924 Addr base, SizeT size )
926 tl_assert(clo_mode == Heap);
928 switch (part) {
929 case Vg_CoreSysCall:
930 case Vg_CoreClientReq:
931 dh_handle_write(base, size);
932 break;
933 case Vg_CoreSignal:
934 break;
935 default:
936 tl_assert(0);
940 //------------------------------------------------------------//
941 //--- Instrumentation ---//
942 //------------------------------------------------------------//
944 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
945 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
946 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
947 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
948 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
950 static
951 void add_counter_update(IRSB* sbOut, Int n)
953 #if defined(VG_BIGENDIAN)
954 # define END Iend_BE
955 #elif defined(VG_LITTLEENDIAN)
956 # define END Iend_LE
957 #else
958 # error "Unknown endianness"
959 #endif
960 // Add code to increment 'g_curr_instrs' by 'n', like this:
961 // WrTmp(t1, Load64(&g_curr_instrs))
962 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
963 // Store(&g_curr_instrs, t2)
964 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
965 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
966 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_curr_instrs );
968 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
969 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
970 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
972 addStmtToIRSB( sbOut, st1 );
973 addStmtToIRSB( sbOut, st2 );
974 addStmtToIRSB( sbOut, st3 );
977 static
978 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
979 Int goff_sp)
981 if (clo_mode != Heap) {
982 return;
985 IRType tyAddr = Ity_INVALID;
986 const HChar* hName= NULL;
987 void* hAddr = NULL;
988 IRExpr** argv = NULL;
989 IRDirty* di = NULL;
991 const Int THRESH = 4096 * 4; // somewhat arbitrary
992 const Int rz_szB = VG_STACK_REDZONE_SZB;
994 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
995 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
997 if (isWrite) {
998 hName = "dh_handle_write";
999 hAddr = &dh_handle_write;
1000 } else {
1001 hName = "dh_handle_read";
1002 hAddr = &dh_handle_read;
1005 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
1007 /* Add the helper. */
1008 tl_assert(hName);
1009 tl_assert(hAddr);
1010 tl_assert(argv);
1011 di = unsafeIRDirty_0_N( 2/*regparms*/,
1012 hName, VG_(fnptr_to_fnentry)( hAddr ),
1013 argv );
1015 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
1016 some arbitrary N. If that fails then addr is in the range (SP -
1017 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
1018 addr is within a page of SP and so can't possibly be a heap
1019 access, and so can be skipped. */
1020 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
1021 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
1023 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
1024 addStmtToIRSB(
1025 sbOut,
1026 assign(sp_minus_rz,
1027 tyAddr == Ity_I32
1028 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
1029 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
1032 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
1033 addStmtToIRSB(
1034 sbOut,
1035 assign(diff,
1036 tyAddr == Ity_I32
1037 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
1038 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
1041 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
1042 addStmtToIRSB(
1043 sbOut,
1044 assign(guard,
1045 tyAddr == Ity_I32
1046 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
1047 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
1049 di->guard = mkexpr(guard);
1051 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
1054 static
1055 IRSB* dh_instrument ( VgCallbackClosure* closure,
1056 IRSB* sbIn,
1057 const VexGuestLayout* layout,
1058 const VexGuestExtents* vge,
1059 const VexArchInfo* archinfo_host,
1060 IRType gWordTy, IRType hWordTy )
1062 Int i, n = 0;
1063 IRSB* sbOut;
1064 IRTypeEnv* tyenv = sbIn->tyenv;
1066 const Int goff_sp = layout->offset_SP;
1068 // We increment the instruction count in two places:
1069 // - just before any Ist_Exit statements;
1070 // - just before the IRSB's end.
1071 // In the former case, we zero 'n' and then continue instrumenting.
1073 sbOut = deepCopyIRSBExceptStmts(sbIn);
1075 // Copy verbatim any IR preamble preceding the first IMark
1076 i = 0;
1077 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1078 addStmtToIRSB( sbOut, sbIn->stmts[i] );
1079 i++;
1082 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1083 IRStmt* st = sbIn->stmts[i];
1085 if (!st || st->tag == Ist_NoOp) continue;
1087 switch (st->tag) {
1089 case Ist_IMark: {
1090 n++;
1091 break;
1094 case Ist_Exit: {
1095 if (n > 0) {
1096 // Add an increment before the Exit statement, then reset 'n'.
1097 add_counter_update(sbOut, n);
1098 n = 0;
1100 break;
1103 case Ist_WrTmp: {
1104 IRExpr* data = st->Ist.WrTmp.data;
1105 if (data->tag == Iex_Load) {
1106 IRExpr* aexpr = data->Iex.Load.addr;
1107 // Note also, endianness info is ignored. I guess
1108 // that's not interesting.
1109 addMemEvent( sbOut, False/*!isWrite*/,
1110 sizeofIRType(data->Iex.Load.ty),
1111 aexpr, goff_sp );
1113 break;
1116 case Ist_Store: {
1117 IRExpr* data = st->Ist.Store.data;
1118 IRExpr* aexpr = st->Ist.Store.addr;
1119 addMemEvent( sbOut, True/*isWrite*/,
1120 sizeofIRType(typeOfIRExpr(tyenv, data)),
1121 aexpr, goff_sp );
1122 break;
1125 case Ist_Dirty: {
1126 Int dataSize;
1127 IRDirty* d = st->Ist.Dirty.details;
1128 if (d->mFx != Ifx_None) {
1129 /* This dirty helper accesses memory. Collect the details. */
1130 tl_assert(d->mAddr != NULL);
1131 tl_assert(d->mSize != 0);
1132 dataSize = d->mSize;
1133 // Large (eg. 28B, 108B, 512B on x86) data-sized
1134 // instructions will be done inaccurately, but they're
1135 // very rare and this avoids errors from hitting more
1136 // than two cache lines in the simulation.
1137 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1138 addMemEvent( sbOut, False/*!isWrite*/,
1139 dataSize, d->mAddr, goff_sp );
1140 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1141 addMemEvent( sbOut, True/*isWrite*/,
1142 dataSize, d->mAddr, goff_sp );
1143 } else {
1144 tl_assert(d->mAddr == NULL);
1145 tl_assert(d->mSize == 0);
1147 break;
1150 case Ist_CAS: {
1151 /* We treat it as a read and a write of the location. I
1152 think that is the same behaviour as it was before IRCAS
1153 was introduced, since prior to that point, the Vex
1154 front ends would translate a lock-prefixed instruction
1155 into a (normal) read followed by a (normal) write. */
1156 Int dataSize;
1157 IRCAS* cas = st->Ist.CAS.details;
1158 tl_assert(cas->addr != NULL);
1159 tl_assert(cas->dataLo != NULL);
1160 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1161 if (cas->dataHi != NULL)
1162 dataSize *= 2; /* since it's a doubleword-CAS */
1163 addMemEvent( sbOut, False/*!isWrite*/,
1164 dataSize, cas->addr, goff_sp );
1165 addMemEvent( sbOut, True/*isWrite*/,
1166 dataSize, cas->addr, goff_sp );
1167 break;
1170 case Ist_LLSC: {
1171 IRType dataTy;
1172 if (st->Ist.LLSC.storedata == NULL) {
1173 /* LL */
1174 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1175 addMemEvent( sbOut, False/*!isWrite*/,
1176 sizeofIRType(dataTy),
1177 st->Ist.LLSC.addr, goff_sp );
1178 } else {
1179 /* SC */
1180 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1181 addMemEvent( sbOut, True/*isWrite*/,
1182 sizeofIRType(dataTy),
1183 st->Ist.LLSC.addr, goff_sp );
1185 break;
1188 default:
1189 break;
1192 addStmtToIRSB( sbOut, st );
1195 if (n > 0) {
1196 // Add an increment before the SB end.
1197 add_counter_update(sbOut, n);
1199 return sbOut;
1202 #undef binop
1203 #undef mkexpr
1204 #undef mkU32
1205 #undef mkU64
1206 #undef assign
1208 //------------------------------------------------------------//
1209 //--- Client requests ---//
1210 //------------------------------------------------------------//
1212 static Bool dh_handle_client_request(ThreadId tid, UWord* arg, UWord* ret)
1214 switch (arg[0]) {
1215 case VG_USERREQ__DHAT_AD_HOC_EVENT: {
1216 if (clo_mode != AdHoc) {
1217 return False;
1220 SizeT len = (SizeT)arg[1];
1222 // Only the ec and req_szB fields are used by intro_Block().
1223 Block bk;
1224 VG_(memset)(&bk, 0, sizeof(bk));
1225 bk.req_szB = len;
1226 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1228 intro_Block(&bk);
1230 return True;
1233 case VG_USERREQ__DHAT_HISTOGRAM_MEMORY: {
1234 Addr address = (Addr)arg[1];
1236 Block* bk = find_Block_containing( address );
1237 // bogus address
1238 if (!bk) {
1239 VG_(message)(
1240 Vg_UserMsg,
1241 "Warning: address for user histogram request not found %llx\n", (ULong)address
1243 return False;
1246 // already histogrammed
1247 if (bk->req_szB <= HISTOGRAM_SIZE_LIMIT) {
1248 VG_(message)(
1249 Vg_UserMsg,
1250 "Warning: request for user histogram of size %lu is smaller than the normal histogram limit, request ignored\n",
1251 bk->req_szB
1253 return False;
1256 // too big
1257 if (bk->req_szB > USER_HISTOGRAM_SIZE_LIMIT) {
1258 VG_(message)(
1259 Vg_UserMsg,
1260 "Warning: request for user histogram of size %lu is larger than the maximum user request limit, request ignored\n",
1261 bk->req_szB
1263 return False;
1267 bk->histoW = VG_(malloc)("dh.new_block.3", bk->req_szB * sizeof(UShort));
1268 VG_(memset)(bk->histoW, 0, bk->req_szB * sizeof(UShort));
1270 return True;
1273 case _VG_USERREQ__DHAT_COPY: {
1274 SizeT len = (SizeT)arg[1];
1276 if (clo_mode != Copy) {
1277 return False;
1280 // Only the ec and req_szB fields are used by intro_Block().
1281 Block bk;
1282 VG_(memset)(&bk, 0, sizeof(bk));
1283 bk.req_szB = len;
1284 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1286 intro_Block(&bk);
1288 return True;
1291 default:
1292 VG_(message)(
1293 Vg_UserMsg,
1294 "Warning: unknown DHAT client request code %llx\n",
1295 (ULong)arg[0]
1297 return False;
1301 //------------------------------------------------------------//
1302 //--- Finalisation ---//
1303 //------------------------------------------------------------//
1305 // File format notes.
1307 // - The files are JSON, because it's a widely-used format and saves us having
1308 // to write a parser in dh_view.js.
1310 // - We use a comma-first style for the generated JSON. Comma-first style
1311 // moves the special case for arrays/objects from the last item to the
1312 // first. This helps in cases where you can't easily tell in advance the
1313 // size of arrays/objects, such as iterating over a WordFM (because
1314 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1315 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1317 // - We use short field names and minimal whitespace to minimize file sizes.
1319 // Sample output:
1321 // {
1322 // // Version number of the format. Incremented on each
1323 // // backwards-incompatible change. A mandatory integer.
1324 // "dhatFileVersion": 2,
1326 // // The invocation mode. A mandatory, free-form string.
1327 // "mode": "heap",
1329 // // The verb used before above stack frames, i.e. "<verb> at {". A
1330 // // mandatory string.
1331 // "verb": "Allocated",
1333 // // Are block lifetimes recorded? Affects whether some other fields are
1334 // // present. A mandatory boolean.
1335 // "bklt": true,
1337 // // Are block accesses recorded? Affects whether some other fields are
1338 // // present. A mandatory boolean.
1339 // "bkacc": true,
1341 // // Byte/bytes/blocks-position units. Optional strings. "byte", "bytes",
1342 // // and "blocks" are the values used if these fields are omitted.
1343 // "bu": "byte", "bsu": "bytes", "bksu": "blocks",
1345 // // Time units (individual and 1,000,000x). Mandatory strings.
1346 // "tu": "instrs", "Mtu": "Minstr"
1348 // // The "short-lived" time threshold, measures in "tu"s.
1349 // // - bklt=true: a mandatory integer.
1350 // // - bklt=false: omitted.
1351 // "tuth": 500,
1353 // // The executed command. A mandatory string.
1354 // "cmd": "date",
1356 // // The process ID. A mandatory integer.
1357 // "pid": 61129
1359 // // The time at the end of execution (t-end). A mandatory integer.
1360 // "te": 350682
1362 // // The time of the global max (t-gmax).
1363 // // - bklt=true: a mandatory integer.
1364 // // - bklt=false: omitted.
1365 // "tg": 331312,
1367 // // The program points. A mandatory array.
1368 // "pps": [
1369 // {
1370 // // Total bytes and blocks. Mandatory integers.
1371 // "tb": 5, "tbk": 1,
1373 // // Total lifetimes of all blocks allocated at this PP.
1374 // // - bklt=true: a mandatory integer.
1375 // // - bklt=false: omitted.
1376 // "tl": 274,
1378 // // The maximum bytes and blocks for this PP.
1379 // // - bklt=true: mandatory integers.
1380 // // - bklt=false: omitted.
1381 // "mb": 5, "mbk": 1,
1383 // // The bytes and blocks at t-gmax for this PP.
1384 // // - bklt=true: mandatory integers.
1385 // // - bklt=false: omitted.
1386 // "gb": 0, "gbk": 0,
1388 // // The bytes and blocks at t-end for this PP.
1389 // // - bklt=true: mandatory integers.
1390 // // - bklt=false: omitted.
1391 // "eb": 0, "ebk": 0,
1393 // // The reads and writes of blocks for this PP.
1394 // // - bkacc=true: mandatory integers.
1395 // // - bkacc=false: omitted.
1396 // "rb": 41, "wb": 5,
1398 // // The exact accesses of blocks for this PP. Only used when all
1399 // // allocations are the same size and sufficiently small. A negative
1400 // // element indicates run-length encoding of the following integer.
1401 // // E.g. `-3, 4` means "three 4s in a row".
1402 // // - bkacc=true: an optional array of integers.
1403 // // - bkacc=false: omitted.
1404 // "acc": [5, -3, 4, 2],
1406 // // Frames. Each element is an index into the "ftbl" array below.
1407 // // - All modes: A mandatory array of integers.
1408 // "fs": [1, 2, 3]
1409 // }
1410 // ],
1412 // // Frame table. A mandatory array of strings.
1413 // "ftbl": [
1414 // "[root]",
1415 // "0x4AA1D9F: _nl_normalize_codeset (l10nflist.c:332)",
1416 // "0x4A9B414: _nl_load_locale_from_archive (loadarchive.c:173)",
1417 // "0x4A9A2BE: _nl_find_locale (findlocale.c:153)"
1418 // ]
1419 // }
1421 static VgFile* fp;
1423 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1425 // The frame table holds unique frames.
1426 static WordFM* frame_tbl = NULL;
1427 static UWord next_frame_n = 0;
1429 static Word frame_cmp(UWord a, UWord b)
1431 return VG_(strcmp)((const HChar*)a, (const HChar*)b);
1434 static HChar hex_digit_to_ascii_char(UChar d)
1436 d = d & 0xf;
1437 return (d < 10) ? ('0' + d) : ('a' + (d - 10));
1440 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1442 // Returns the original string if no escaping was required. Returns a pointer
1443 // to a static buffer if escaping was required. Therefore, the return value is
1444 // only valid until the next call to this function.
1445 static const HChar* json_escape(const HChar* s)
1447 static HChar* buf = NULL;
1448 static SizeT bufcap = 0;
1450 // Do we need any escaping?
1451 SizeT extra = 0;
1452 const HChar* p = s;
1453 while (*p) {
1454 UChar c = *p;
1455 if (c == '"' || c == '\\') {
1456 extra += 1;
1457 } else if (c <= 0x1f) {
1458 extra += 5;
1460 p++;
1462 SizeT len = p - s;
1464 if (extra == 0) {
1465 // No escaping needed.
1466 return s;
1469 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1470 // necessary.
1471 SizeT newcap = len + extra + 1;
1472 if (bufcap < newcap) {
1473 buf = VG_(realloc)("dh.json", buf, newcap);
1474 bufcap = newcap;
1477 p = s;
1478 HChar* q = buf;
1479 while (*p) {
1480 UChar c = *p;
1481 if (c == '"') {
1482 *q++ = '\\';
1483 *q++ = '"';
1484 } else if (c == '\\') {
1485 *q++ = '\\';
1486 *q++ = '\\';
1487 } else if (c <= 0x1f) {
1488 *q++ = '\\';
1489 *q++ = 'u';
1490 *q++ = '0';
1491 *q++ = '0';
1492 *q++ = hex_digit_to_ascii_char((c & 0x00f0) >> 4);
1493 *q++ = hex_digit_to_ascii_char(c & 0x000f);
1494 } else {
1495 *q++ = c;
1497 p++;
1499 *q = '\0';
1501 return buf;
1504 static void write_PPInfo_frame(UInt n, DiEpoch ep, Addr ip, void* opaque)
1506 Bool* is_first = (Bool*)opaque;
1507 InlIPCursor* iipc = VG_(new_IIPC)(ep, ip);
1509 do {
1510 const HChar* buf = VG_(describe_IP)(ep, ip, iipc);
1512 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1513 // `realloc`, `operator new`) because they're boring and clog up the
1514 // output.
1515 if (VG_(strstr)(buf, "vg_replace_malloc.c")) {
1516 continue;
1519 // If this description has been seen before, get its number. Otherwise,
1520 // give it a new number and put it in the table.
1521 UWord keyW = 0, valW = 0;
1522 UWord frame_n = 0;
1523 Bool found = VG_(lookupFM)(frame_tbl, &keyW, &valW, (UWord)buf);
1524 if (found) {
1525 //const HChar* str = (const HChar*)keyW;
1526 //tl_assert(0 == VG_(strcmp)(buf, str));
1527 frame_n = valW;
1528 } else {
1529 // `buf` is a static buffer, we must copy it.
1530 const HChar* str = VG_(strdup)("dh.frame_tbl.3", buf);
1531 frame_n = next_frame_n++;
1532 Bool present = VG_(addToFM)(frame_tbl, (UWord)str, frame_n);
1533 tl_assert(!present);
1536 FP("%c%lu", *is_first ? '[' : ',', frame_n);
1537 *is_first = False;
1539 } while (VG_(next_IIPC)(iipc));
1541 VG_(delete_IIPC)(iipc);
1544 static void write_PPInfo(PPInfo* ppi, Bool is_first)
1546 FP(" %c{\"tb\":%llu,\"tbk\":%llu\n",
1547 is_first ? '[' : ',',
1548 ppi->total_bytes, ppi->total_blocks);
1550 if (clo_mode == Heap) {
1551 tl_assert(ppi->total_blocks >= ppi->max_blocks);
1552 tl_assert(ppi->total_bytes >= ppi->max_bytes);
1554 FP(" ,\"tl\":%llu\n",
1555 ppi->total_lifetimes_instrs);
1556 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1557 ppi->max_bytes, ppi->max_blocks);
1558 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1559 ppi->at_tgmax_bytes, ppi->at_tgmax_blocks);
1560 FP(" ,\"eb\":%llu,\"ebk\":%llu\n",
1561 ppi->curr_bytes, ppi->curr_blocks);
1562 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1563 ppi->reads_bytes, ppi->writes_bytes);
1565 if (ppi->histo && ppi->xsize_tag == Exactly) {
1566 FP(" ,\"acc\":[");
1568 // Simple run-length encoding: when N entries in a row have the same
1569 // value M, we print "-N,M". If there is just one in a row, we just
1570 // print "M". This reduces file size significantly.
1571 UShort repval = 0;
1572 Int reps = 0;
1573 for (UWord i = 0; i < ppi->xsize; i++) {
1574 UShort h = ppi->histo[i];
1575 if (repval == h) {
1576 // Continue current run.
1577 reps++;
1578 } else {
1579 // End of run; print it.
1580 if (reps == 1) {
1581 FP("%u,", repval);
1582 } else if (reps > 1) {
1583 FP("-%d,%u,", reps, repval);
1585 reps = 1;
1586 repval = h;
1589 // Print the final run.
1590 if (reps == 1) {
1591 FP("%u", repval);
1592 } else if (reps > 1) {
1593 FP("-%d,%u", reps, repval);
1596 FP("]\n");
1598 } else {
1599 tl_assert(ppi->curr_bytes == 0);
1600 tl_assert(ppi->curr_blocks == 0);
1601 tl_assert(ppi->max_bytes == 0);
1602 tl_assert(ppi->max_blocks == 0);
1603 tl_assert(ppi->at_tgmax_bytes == 0);
1604 tl_assert(ppi->at_tgmax_blocks == 0);
1605 tl_assert(ppi->total_lifetimes_instrs == 0);
1606 tl_assert(ppi->freed_blocks == 0);
1607 tl_assert(ppi->reads_bytes == 0);
1608 tl_assert(ppi->writes_bytes == 0);
1609 tl_assert(ppi->xsize_tag == 0);
1610 tl_assert(ppi->xsize == 0);
1611 tl_assert(ppi->histo == NULL);
1614 FP(" ,\"fs\":");
1615 Bool is_first_frame = True;
1616 VG_(apply_ExeContext)(write_PPInfo_frame, &is_first_frame, ppi->ec);
1617 FP("]\n");
1619 FP(" }\n");
1622 static void write_PPInfos(void)
1624 UWord keyW, valW;
1626 FP(",\"pps\":\n");
1628 VG_(initIterFM)(ppinfo);
1629 Bool is_first = True;
1630 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
1631 PPInfo* ppi = (PPInfo*)valW;
1632 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
1633 write_PPInfo(ppi, is_first);
1634 is_first = False;
1636 VG_(doneIterFM)(ppinfo);
1638 if (is_first) {
1639 // We didn't print any elements. This happens if ppinfo is empty.
1640 FP(" [\n");
1643 FP(" ]\n");
1646 static void dh_fini(Int exit_status)
1648 // This function does lots of allocations that it doesn't bother to free,
1649 // because execution is almost over anyway.
1651 UWord keyW, valW;
1653 // Total bytes might be at a possible peak.
1654 if (clo_mode == Heap) {
1655 check_for_peak();
1657 // Before printing statistics, we must harvest various stats (such as
1658 // lifetimes and accesses) for all the blocks that are still alive.
1659 VG_(initIterFM)( interval_tree );
1660 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1661 Block* bk = (Block*)keyW;
1662 tl_assert(valW == 0);
1663 tl_assert(bk);
1664 retire_Block(bk, False/*!because_freed*/);
1666 VG_(doneIterFM)( interval_tree );
1668 // Stats.
1669 if (VG_(clo_stats)) {
1670 VG_(dmsg)(" dhat: find_Block_containing:\n");
1671 VG_(dmsg)(" dhat: found: %'lu\n",
1672 stats__n_fBc_cached0 + stats__n_fBc_cached1
1673 + stats__n_fBc_cached2
1674 + stats__n_fBc_uncached);
1675 VG_(dmsg)(" dhat: at cache0 %'14lu at cache1 %'14lu\n",
1676 stats__n_fBc_cached0,
1677 stats__n_fBc_cached1);
1678 VG_(dmsg)(" dhat: at cache2 %'14lu uncached %'14lu\n",
1679 stats__n_fBc_cached2,
1680 stats__n_fBc_uncached);
1681 VG_(dmsg)(" dhat: notfound: %'lu\n", stats__n_fBc_notfound);
1682 VG_(dmsg)("\n");
1686 // Create the frame table, and insert the special "[root]" node at index 0.
1687 frame_tbl = VG_(newFM)(VG_(malloc),
1688 "dh.frame_tbl.1",
1689 VG_(free),
1690 frame_cmp);
1691 const HChar* root = VG_(strdup)("dh.frame_tbl.2", "[root]");
1692 Bool present = VG_(addToFM)(frame_tbl, (UWord)root, 0);
1693 tl_assert(!present);
1694 next_frame_n = 1;
1696 // Setup output filename. Nb: it's important to do this now, i.e. as late
1697 // as possible. If we do it at start-up and the program forks and the
1698 // output file format string contains a %p (pid) specifier, both the parent
1699 // and child will incorrectly write to the same file; this happened in
1700 // 3.3.0.
1701 HChar* dhat_out_file =
1702 VG_(expand_file_name)("--dhat-out-file", clo_dhat_out_file);
1704 fp = VG_(fopen)(dhat_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1705 VKI_S_IRUSR|VKI_S_IWUSR);
1706 if (!fp) {
1707 VG_(umsg)("error: can't open DHAT output file '%s'\n", dhat_out_file);
1708 VG_(free)(dhat_out_file);
1709 return;
1712 // Write to data file.
1713 FP("{\"dhatFileVersion\":2\n");
1715 // The output mode, block booleans, and byte/block units.
1716 if (clo_mode == Heap) {
1717 FP(",\"mode\":\"heap\",\"verb\":\"Allocated\"\n");
1718 FP(",\"bklt\":true,\"bkacc\":true\n");
1719 } else if (clo_mode == Copy) {
1720 FP(",\"mode\":\"copy\",\"verb\":\"Copied\"\n");
1721 FP(",\"bklt\":false,\"bkacc\":false\n");
1722 } else if (clo_mode == AdHoc) {
1723 FP(",\"mode\":\"ad-hoc\",\"verb\":\"Occurred\"\n");
1724 FP(",\"bklt\":false,\"bkacc\":false\n");
1725 FP(",\"bu\":\"unit\",\"bsu\":\"units\",\"bksu\":\"events\"\n");
1726 } else {
1727 tl_assert(False);
1730 // The time units.
1731 FP(",\"tu\":\"instrs\",\"Mtu\":\"Minstr\"\n");
1732 if (clo_mode == Heap) {
1733 FP(",\"tuth\":500\n");
1736 // The command.
1737 const HChar* exe = VG_(args_the_exename);
1738 FP(",\"cmd\":\"%s", json_escape(exe));
1739 for (Word i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
1740 const HChar* arg = *(HChar**)VG_(indexXA)(VG_(args_for_client), i);
1741 FP(" %s", json_escape(arg));
1743 FP("\"\n");
1745 // The PID.
1746 FP(",\"pid\":%d\n", VG_(getpid)());
1748 // Times.
1749 FP(",\"te\":%llu\n", g_curr_instrs);
1750 if (clo_mode == Heap) {
1751 FP(",\"tg\":%llu\n", g_tgmax_instrs);
1752 } else {
1753 tl_assert(g_tgmax_instrs == 0);
1756 // APs.
1757 write_PPInfos();
1759 // Frame table.
1760 FP(",\"ftbl\":\n");
1762 // The frame table maps strings to numbers. We want to print it ordered by
1763 // numbers. So we create an array and fill it in from the frame table, then
1764 // print that.
1765 UWord n_frames = next_frame_n;
1766 const HChar** frames =
1767 VG_(malloc)("dh.frames", n_frames * sizeof(const HChar*));
1768 VG_(initIterFM)(frame_tbl);
1769 while (VG_(nextIterFM)(frame_tbl, &keyW, &valW)) {
1770 const HChar* str = (const HChar*)keyW;
1771 UWord n = valW;
1772 frames[n] = str;
1774 VG_(doneIterFM)(frame_tbl);
1776 for (UWord i = 0; i < n_frames; i++) {
1777 FP(" %c\"%s\"\n", i == 0 ? '[' : ',', json_escape(frames[i]));
1779 FP(" ]\n");
1780 VG_(free)(frames);
1782 FP("}\n");
1784 VG_(fclose)(fp);
1785 fp = NULL;
1787 if (VG_(clo_verbosity) == 0) {
1788 return;
1791 // Print brief global stats.
1792 VG_(umsg)("Total: %'llu %s in %'llu %s\n",
1793 g_total_bytes, clo_mode == AdHoc ? "units" : "bytes",
1794 g_total_blocks, clo_mode == AdHoc ? "events" : "blocks");
1795 if (clo_mode == Heap) {
1796 VG_(umsg)("At t-gmax: %'llu bytes in %'llu blocks\n",
1797 g_max_bytes, g_max_blocks);
1798 VG_(umsg)("At t-end: %'llu bytes in %'llu blocks\n",
1799 g_curr_bytes, g_curr_blocks);
1800 VG_(umsg)("Reads: %'llu bytes\n", g_reads_bytes);
1801 VG_(umsg)("Writes: %'llu bytes\n", g_writes_bytes);
1802 } else {
1803 tl_assert(g_max_bytes == 0);
1804 tl_assert(g_max_blocks == 0);
1805 tl_assert(g_curr_bytes == 0);
1806 tl_assert(g_curr_blocks == 0);
1807 tl_assert(g_reads_bytes == 0);
1808 tl_assert(g_writes_bytes == 0);
1811 // Print a how-to-view-the-profile hint.
1812 VG_(umsg)("\n");
1813 VG_(umsg)("To view the resulting profile, open\n");
1814 VG_(umsg)(" file://%s/%s\n", DHAT_VIEW_DIR, "dh_view.html");
1815 VG_(umsg)("in a web browser, click on \"Load...\", "
1816 "and then select the file\n");
1817 VG_(umsg)(" %s\n", dhat_out_file);
1818 VG_(umsg)("The text at the bottom explains the abbreviations used in the "
1819 "output.\n");
1821 VG_(free)(dhat_out_file);
1824 //------------------------------------------------------------//
1825 //--- Initialisation ---//
1826 //------------------------------------------------------------//
1828 static void dh_post_clo_init(void)
1830 if (clo_mode == Heap) {
1831 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1832 VG_(track_pre_mem_read_asciiz) ( dh_handle_noninsn_read_asciiz );
1833 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1837 static void dh_pre_clo_init(void)
1839 VG_(details_name) ("DHAT");
1840 VG_(details_version) (NULL);
1841 VG_(details_description) ("a dynamic heap analysis tool");
1842 VG_(details_copyright_author)(
1843 "Copyright (C) 2010-2018, and GNU GPL'd, by Mozilla Foundation");
1844 VG_(details_bug_reports_to) (VG_BUGS_TO);
1845 VG_(details_avg_translation_sizeB) ( 600 );
1847 // Basic functions.
1848 VG_(basic_tool_funcs) (dh_post_clo_init,
1849 dh_instrument,
1850 dh_fini);
1852 // Needs.
1853 VG_(needs_libc_freeres)();
1854 VG_(needs_cxx_freeres)();
1855 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1856 dh_print_usage,
1857 dh_print_debug_usage);
1858 VG_(needs_client_requests) (dh_handle_client_request);
1859 // VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1860 // dh_expensive_sanity_check);
1861 VG_(needs_malloc_replacement)(dh_malloc,
1862 dh___builtin_new,
1863 dh___builtin_new_aligned,
1864 dh___builtin_vec_new,
1865 dh___builtin_vec_new_aligned,
1866 dh_memalign,
1867 dh_calloc,
1868 dh_free,
1869 dh___builtin_delete,
1870 dh___builtin_delete_aligned,
1871 dh___builtin_vec_delete,
1872 dh___builtin_vec_delete_aligned,
1873 dh_realloc,
1874 dh_malloc_usable_size,
1875 0 );
1877 tl_assert(!interval_tree);
1878 tl_assert(!fbc_cache0);
1879 tl_assert(!fbc_cache1);
1880 tl_assert(!fbc_cache2);
1882 interval_tree = VG_(newFM)( VG_(malloc),
1883 "dh.interval_tree.1",
1884 VG_(free),
1885 interval_tree_Cmp );
1887 ppinfo = VG_(newFM)( VG_(malloc),
1888 "dh.ppinfo.1",
1889 VG_(free),
1890 NULL/*unboxedcmp*/ );
1893 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1895 //--------------------------------------------------------------------//
1896 //--- end dh_main.c ---//
1897 //--------------------------------------------------------------------//