vgdb: Handle EAGAIN in read_buf
[valgrind.git] / dhat / dh_main.c
blob57d94237c5b8261ac419abf1db90ba2ad8202b52
2 //--------------------------------------------------------------------//
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---//
4 //--------------------------------------------------------------------//
6 /*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
10 Copyright (C) 2010-2018 Mozilla Foundation
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 The GNU General Public License is contained in the file COPYING.
28 /* Contributed by Julian Seward <jseward@acm.org> */
30 #include "pub_tool_basics.h"
31 #include "pub_tool_clientstate.h"
32 #include "pub_tool_clreq.h"
33 #include "pub_tool_libcbase.h"
34 #include "pub_tool_libcassert.h"
35 #include "pub_tool_libcfile.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_libcproc.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_mallocfree.h"
40 #include "pub_tool_options.h"
41 #include "pub_tool_replacemalloc.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
45 #include "dhat.h"
47 #define HISTOGRAM_SIZE_LIMIT 1024
49 //------------------------------------------------------------//
50 //--- Globals ---//
51 //------------------------------------------------------------//
53 // Values for the entire run.
54 static ULong g_total_blocks = 0;
55 static ULong g_total_bytes = 0;
57 // Current values. g_curr_blocks and g_curr_bytes are only used with
58 // clo_mode=Heap.
59 static ULong g_curr_blocks = 0;
60 static ULong g_curr_bytes = 0;
61 static ULong g_curr_instrs = 0; // incremented from generated code
63 // Values at the global max, i.e. when g_curr_bytes peaks.
64 // Only used with clo_mode=Heap.
65 static ULong g_max_blocks = 0;
66 static ULong g_max_bytes = 0;
68 // Time of the global max.
69 static ULong g_tgmax_instrs = 0;
71 // Values for the entire run. Updated each time a block is retired.
72 // Only used with clo_mode=Heap.
73 static ULong g_reads_bytes = 0;
74 static ULong g_writes_bytes = 0;
76 //------------------------------------------------------------//
77 //--- Command line args ---//
78 //------------------------------------------------------------//
80 typedef enum { Heap=55, Copy, AdHoc } ProfileKind;
82 static ProfileKind clo_mode = Heap;
84 static const HChar* clo_dhat_out_file = "dhat.out.%p";
86 static Bool dh_process_cmd_line_option(const HChar* arg)
88 if VG_STR_CLO(arg, "--dhat-out-file", clo_dhat_out_file) {
90 } else if (VG_XACT_CLO(arg, "--mode=heap", clo_mode, Heap)) {
91 } else if (VG_XACT_CLO(arg, "--mode=copy", clo_mode, Copy)) {
92 } else if (VG_XACT_CLO(arg, "--mode=ad-hoc", clo_mode, AdHoc)) {
94 } else {
95 return VG_(replacement_malloc_process_cmd_line_option)(arg);
98 return True;
101 static void dh_print_usage(void)
103 VG_(printf)(
104 " --dhat-out-file=<file> output file name [dhat.out.%%p]\n"
105 " --mode=heap|copy|ad-hoc profiling mode\n"
109 static void dh_print_debug_usage(void)
111 VG_(printf)(
112 " (none)\n"
116 //------------------------------------------------------------//
117 //--- an Interval Tree of live blocks ---//
118 //------------------------------------------------------------//
120 /* Tracks information about live blocks. */
121 typedef
122 struct {
123 Addr payload;
124 SizeT req_szB;
125 ExeContext* ec; /* allocation ec */
126 ULong allocd_at; /* instruction number */
127 ULong reads_bytes;
128 ULong writes_bytes;
129 /* Approx histogram, one byte per payload byte. Counts latch up
130 therefore at 0xFFFF. Can be NULL if the block is resized or if
131 the block is larger than HISTOGRAM_SIZE_LIMIT. */
132 UShort* histoW; /* [0 .. req_szB-1] */
134 Block;
136 /* May not contain zero-sized blocks. May not contain
137 overlapping blocks. */
138 static WordFM* interval_tree = NULL; /* WordFM* Block* void */
140 /* Here's the comparison function. Since the tree is required
141 to contain non-zero sized, non-overlapping blocks, it's good
142 enough to consider any overlap as a match. */
143 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
145 Block* b1 = (Block*)k1;
146 Block* b2 = (Block*)k2;
147 tl_assert(b1->req_szB > 0);
148 tl_assert(b2->req_szB > 0);
149 if (b1->payload + b1->req_szB <= b2->payload) return -1;
150 if (b2->payload + b2->req_szB <= b1->payload) return 1;
151 return 0;
154 // 3-entry cache for find_Block_containing
155 static Block* fbc_cache0 = NULL;
156 static Block* fbc_cache1 = NULL;
157 static Block* fbc_cache2 = NULL;
159 static UWord stats__n_fBc_cached0 = 0;
160 static UWord stats__n_fBc_cached1 = 0;
161 static UWord stats__n_fBc_cached2 = 0;
162 static UWord stats__n_fBc_uncached = 0;
163 static UWord stats__n_fBc_notfound = 0;
165 static Block* find_Block_containing ( Addr a )
167 tl_assert(clo_mode == Heap);
169 if (LIKELY(fbc_cache0
170 && fbc_cache0->payload <= a
171 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
172 // found at 0
173 stats__n_fBc_cached0++;
174 return fbc_cache0;
176 if (LIKELY(fbc_cache1
177 && fbc_cache1->payload <= a
178 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
179 // found at 1; swap 0 and 1
180 Block* tmp = fbc_cache1;
181 fbc_cache1 = fbc_cache0;
182 fbc_cache0 = tmp;
183 stats__n_fBc_cached1++;
184 return tmp;
186 if (LIKELY(fbc_cache2
187 && fbc_cache2->payload <= a
188 && a < fbc_cache2->payload + fbc_cache2->req_szB)) {
189 // found at 2; swap 1 and 2
190 Block* tmp = fbc_cache2;
191 fbc_cache2 = fbc_cache1;
192 fbc_cache1 = tmp;
193 stats__n_fBc_cached2++;
194 return tmp;
197 Block fake;
198 fake.payload = a;
199 fake.req_szB = 1;
200 UWord foundkey = 1;
201 UWord foundval = 1;
202 Bool found = VG_(lookupFM)( interval_tree,
203 &foundkey, &foundval, (UWord)&fake );
204 if (!found) {
205 stats__n_fBc_notfound++;
206 return NULL;
208 tl_assert(foundval == 0); // we don't store vals in the interval tree
209 tl_assert(foundkey != 1);
210 Block* res = (Block*)foundkey;
211 tl_assert(res != &fake);
212 // put at the top position
213 fbc_cache2 = fbc_cache1;
214 fbc_cache1 = fbc_cache0;
215 fbc_cache0 = res;
216 stats__n_fBc_uncached++;
217 return res;
220 // delete a block; asserts if not found. (viz, 'a' must be
221 // known to be present.)
222 static void delete_Block_starting_at ( Addr a )
224 tl_assert(clo_mode == Heap);
226 Block fake;
227 fake.payload = a;
228 fake.req_szB = 1;
229 Bool found = VG_(delFromFM)( interval_tree,
230 NULL, NULL, (Addr)&fake );
231 tl_assert(found);
232 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
235 //------------------------------------------------------------//
236 //--- a FM of allocation points (APs) ---//
237 //------------------------------------------------------------//
239 typedef
240 struct {
241 // The program point that we're summarising stats for.
242 ExeContext* ec;
244 // Total number of blocks and bytes allocated by this PP.
245 ULong total_blocks;
246 ULong total_bytes;
248 // The current number of blocks and bytes live for this PP.
249 // Only used with clo_mode=Heap.
250 ULong curr_blocks;
251 ULong curr_bytes;
253 // Values at the PP max, i.e. when this PP's curr_bytes peaks.
254 // Only used with clo_mode=Heap.
255 ULong max_blocks; // Blocks at the PP max.
256 ULong max_bytes; // The PP max, measured in bytes.
258 // Values at the global max.
259 // Only used with clo_mode=Heap.
260 ULong at_tgmax_blocks;
261 ULong at_tgmax_bytes;
263 // Total lifetimes of all blocks allocated by this PP. Includes blocks
264 // explicitly freed and blocks implicitly freed at termination.
265 // Only used with clo_mode=Heap.
266 ULong total_lifetimes_instrs;
268 // Number of blocks freed by this PP. (Only used in assertions.)
269 // Only used with clo_mode=Heap.
270 ULong freed_blocks;
272 // Total number of reads and writes in all blocks allocated
273 // by this PP. Only used with clo_mode=Heap.
274 ULong reads_bytes;
275 ULong writes_bytes;
277 /* Histogram information. We maintain a histogram aggregated for
278 all retiring Blocks allocated by this PP, but only if:
279 - this PP has only ever allocated objects of one size
280 - that size is <= HISTOGRAM_SIZE_LIMIT
281 What we need therefore is a mechanism to see if this PP
282 has only ever allocated blocks of one size.
284 3 states:
285 Unknown because no retirement yet
286 Exactly xsize all retiring blocks are of this size
287 Mixed multiple different sizes seen
289 Only used with clo_mode=Heap.
291 enum { Unknown=999, Exactly, Mixed } xsize_tag;
292 SizeT xsize;
293 UInt* histo; /* [0 .. xsize-1] */
295 PPInfo;
297 /* maps ExeContext*'s to PPInfo*'s. Note that the keys must match the
298 .ec field in the values. */
299 static WordFM* ppinfo = NULL; /* WordFM* ExeContext* PPInfo* */
301 // Are we at peak memory? If so, update at_tgmax_blocks and at_tgmax_bytes in
302 // all PPInfos. Note that this is moderately expensive so we avoid calling it
303 // on every allocation.
304 static void check_for_peak(void)
306 tl_assert(clo_mode == Heap);
308 if (g_curr_bytes == g_max_bytes) {
309 // It's a peak. (If there are multiple equal peaks we record the latest
310 // one.)
311 UWord keyW, valW;
312 VG_(initIterFM)(ppinfo);
313 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
314 PPInfo* ppi = (PPInfo*)valW;
315 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
316 ppi->at_tgmax_blocks = ppi->curr_blocks;
317 ppi->at_tgmax_bytes = ppi->curr_bytes;
319 VG_(doneIterFM)(ppinfo);
323 /* 'bk' is being introduced (has just been allocated). Find the
324 relevant PPInfo entry for it, or create one, based on the block's
325 allocation EC. Then, update the PPInfo to the extent that we
326 actually can, to reflect the allocation. */
327 static void intro_Block(Block* bk)
329 tl_assert(bk);
330 tl_assert(bk->ec);
332 PPInfo* ppi = NULL;
333 UWord keyW = 0;
334 UWord valW = 0;
335 Bool found = VG_(lookupFM)( ppinfo,
336 &keyW, &valW, (UWord)bk->ec );
337 if (found) {
338 ppi = (PPInfo*)valW;
339 tl_assert(keyW == (UWord)bk->ec);
340 } else {
341 ppi = VG_(malloc)( "dh.intro_Block.1", sizeof(PPInfo) );
342 VG_(memset)(ppi, 0, sizeof(*ppi));
343 ppi->ec = bk->ec;
344 Bool present = VG_(addToFM)( ppinfo,
345 (UWord)bk->ec, (UWord)ppi );
346 tl_assert(!present);
347 if (clo_mode == Heap) {
348 // histo stuff
349 tl_assert(ppi->freed_blocks == 0);
350 ppi->xsize_tag = Unknown;
351 ppi->xsize = 0;
352 if (0) VG_(printf)("ppi %p --> Unknown\n", ppi);
356 tl_assert(ppi->ec == bk->ec);
358 // Update global stats and PPInfo stats.
360 g_total_blocks++;
361 g_total_bytes += bk->req_szB;
363 ppi->total_blocks++;
364 ppi->total_bytes += bk->req_szB;
366 if (clo_mode == Heap) {
367 g_curr_blocks++;
368 g_curr_bytes += bk->req_szB;
370 ppi->curr_blocks++;
371 ppi->curr_bytes += bk->req_szB;
373 // The use of `>=` rather than `>` means that if there are multiple equal
374 // peaks we record the latest one, like `check_for_peak` does.
375 if (g_curr_bytes >= g_max_bytes) {
376 g_max_blocks = g_curr_blocks;
377 g_max_bytes = g_curr_bytes;
378 g_tgmax_instrs = g_curr_instrs;
380 ppi->max_blocks = ppi->curr_blocks;
381 ppi->max_bytes = ppi->curr_bytes;
386 /* 'bk' is retiring (being freed). Find the relevant PPInfo entry for
387 it, which must already exist. Then, fold info from 'bk' into that
388 entry. 'because_freed' is True if the block is retiring because
389 the client has freed it. If it is False then the block is retiring
390 because the program has finished, in which case we want to skip the
391 updates of the total blocks live etc for this PP, but still fold in
392 the access counts and histo data that have so far accumulated for
393 the block. */
394 static void retire_Block(Block* bk, Bool because_freed)
396 tl_assert(clo_mode == Heap);
397 tl_assert(bk);
398 tl_assert(bk->ec);
400 PPInfo* ppi = NULL;
401 UWord keyW = 0;
402 UWord valW = 0;
403 Bool found = VG_(lookupFM)( ppinfo,
404 &keyW, &valW, (UWord)bk->ec );
405 tl_assert(found);
406 ppi = (PPInfo*)valW;
407 tl_assert(ppi->ec == bk->ec);
409 // update stats following this free.
410 if (0)
411 VG_(printf)("ec %p ppi->c_by_l %llu bk->rszB %llu\n",
412 bk->ec, ppi->curr_bytes, (ULong)bk->req_szB);
414 if (because_freed) {
415 // Total bytes is coming down from a possible peak.
416 check_for_peak();
418 // Then update global stats.
419 tl_assert(g_curr_blocks >= 1);
420 tl_assert(g_curr_bytes >= bk->req_szB);
421 g_curr_blocks--;
422 g_curr_bytes -= bk->req_szB;
424 // Then update PPInfo stats.
425 tl_assert(ppi->curr_blocks >= 1);
426 tl_assert(ppi->curr_bytes >= bk->req_szB);
427 ppi->curr_blocks--;
428 ppi->curr_bytes -= bk->req_szB;
430 ppi->freed_blocks++;
433 tl_assert(bk->allocd_at <= g_curr_instrs);
434 ppi->total_lifetimes_instrs += (g_curr_instrs - bk->allocd_at);
436 // access counts
437 ppi->reads_bytes += bk->reads_bytes;
438 ppi->writes_bytes += bk->writes_bytes;
439 g_reads_bytes += bk->reads_bytes;
440 g_writes_bytes += bk->writes_bytes;
442 // histo stuff. First, do state transitions for xsize/xsize_tag.
443 switch (ppi->xsize_tag) {
445 case Unknown:
446 tl_assert(ppi->xsize == 0);
447 tl_assert(ppi->freed_blocks == 1 || ppi->freed_blocks == 0);
448 tl_assert(!ppi->histo);
449 ppi->xsize_tag = Exactly;
450 ppi->xsize = bk->req_szB;
451 if (0) VG_(printf)("ppi %p --> Exactly(%lu)\n", ppi, ppi->xsize);
452 // and allocate the histo
453 if (bk->histoW) {
454 ppi->histo = VG_(malloc)("dh.retire_Block.1",
455 ppi->xsize * sizeof(UInt));
456 VG_(memset)(ppi->histo, 0, ppi->xsize * sizeof(UInt));
458 break;
460 case Exactly:
461 //tl_assert(ppi->freed_blocks > 1);
462 if (bk->req_szB != ppi->xsize) {
463 if (0) VG_(printf)("ppi %p --> Mixed(%lu -> %lu)\n",
464 ppi, ppi->xsize, bk->req_szB);
465 ppi->xsize_tag = Mixed;
466 ppi->xsize = 0;
467 // deallocate the histo, if any
468 if (ppi->histo) {
469 VG_(free)(ppi->histo);
470 ppi->histo = NULL;
473 break;
475 case Mixed:
476 //tl_assert(ppi->freed_blocks > 1);
477 break;
479 default:
480 tl_assert(0);
483 // See if we can fold the histo data from this block into
484 // the data for the PP.
485 if (ppi->xsize_tag == Exactly && ppi->histo && bk->histoW) {
486 tl_assert(ppi->xsize == bk->req_szB);
487 UWord i;
488 for (i = 0; i < ppi->xsize; i++) {
489 // FIXME: do something better in case of overflow of ppi->histo[..]
490 // Right now, at least don't let it overflow/wrap around
491 if (ppi->histo[i] <= 0xFFFE0000)
492 ppi->histo[i] += (UInt)bk->histoW[i];
494 if (0) VG_(printf)("fold in, PP = %p\n", ppi);
497 #if 0
498 if (bk->histoB) {
499 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
500 UWord i;
501 for (i = 0; i < bk->req_szB; i++)
502 VG_(printf)("%u ", (UInt)bk->histoB[i]);
503 VG_(printf)("\n");
504 } else {
505 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
507 #endif
510 /* This handles block resizing. When a block with PP 'ec' has a
511 size change of 'delta', call here to update the PPInfo. */
512 static void resize_Block(ExeContext* ec, SizeT old_req_szB, SizeT new_req_szB)
514 tl_assert(clo_mode == Heap);
516 Long delta = (Long)new_req_szB - (Long)old_req_szB;
517 PPInfo* ppi = NULL;
518 UWord keyW = 0;
519 UWord valW = 0;
520 Bool found = VG_(lookupFM)( ppinfo,
521 &keyW, &valW, (UWord)ec );
523 tl_assert(found);
524 ppi = (PPInfo*)valW;
525 tl_assert(ppi->ec == ec);
527 if (delta < 0) {
528 tl_assert(ppi->curr_bytes >= -delta);
529 tl_assert(g_curr_bytes >= -delta);
531 // Total bytes might be coming down from a possible peak.
532 check_for_peak();
535 // Note: we treat realloc() like malloc() + free() for total counts, i.e. we
536 // increment total_blocks by 1 and increment total_bytes by new_req_szB.
538 // A reasonable alternative would be to leave total_blocks unchanged and
539 // increment total_bytes by delta (but only if delta is positive). But then
540 // calls to realloc wouldn't be counted towards the total_blocks count,
541 // which is undesirable.
543 // Update global stats and PPInfo stats.
545 g_total_blocks++;
546 g_total_bytes += new_req_szB;
548 ppi->total_blocks++;
549 ppi->total_bytes += new_req_szB;
551 g_curr_blocks += 0; // unchanged
552 g_curr_bytes += delta;
554 ppi->curr_blocks += 0; // unchanged
555 ppi->curr_bytes += delta;
557 // The use of `>=` rather than `>` means that if there are multiple equal
558 // peaks we record the latest one, like `check_for_peak` does.
559 if (g_curr_bytes >= g_max_bytes) {
560 g_max_blocks = g_curr_blocks;
561 g_max_bytes = g_curr_bytes;
562 g_tgmax_instrs = g_curr_instrs;
564 ppi->max_blocks = ppi->curr_blocks;
565 ppi->max_bytes = ppi->curr_bytes;
569 //------------------------------------------------------------//
570 //--- update both Block and PPInfos after {m,re}alloc/free ---//
571 //------------------------------------------------------------//
573 static
574 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
575 Bool is_zeroed )
577 tl_assert(p == NULL); // don't handle custom allocators right now
578 SizeT actual_szB;
580 if ((SSizeT)req_szB < 0) return NULL;
582 if (req_szB == 0) {
583 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
586 // Allocate and zero if necessary
587 if (!p) {
588 p = VG_(cli_malloc)( req_alignB, req_szB );
589 if (!p) {
590 return NULL;
592 if (is_zeroed) VG_(memset)(p, 0, req_szB);
593 actual_szB = VG_(cli_malloc_usable_size)(p);
594 tl_assert(actual_szB >= req_szB);
597 if (clo_mode != Heap) {
598 return p;
601 // Make new Block, add to interval_tree.
602 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
603 bk->payload = (Addr)p;
604 bk->req_szB = req_szB;
605 bk->ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
606 bk->allocd_at = g_curr_instrs;
607 bk->reads_bytes = 0;
608 bk->writes_bytes = 0;
609 // Set up histogram array, if the block isn't too large.
610 bk->histoW = NULL;
611 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
612 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
613 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
616 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
617 tl_assert(!present);
618 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
620 intro_Block(bk);
622 return p;
625 static
626 void die_block ( void* p )
628 VG_(cli_free)(p);
630 if (clo_mode != Heap) {
631 return;
634 Block* bk = find_Block_containing( (Addr)p );
635 if (!bk) {
636 return; // bogus free
639 tl_assert(bk->req_szB > 0);
640 // assert the block finder is behaving sanely
641 tl_assert(bk->payload <= (Addr)p);
642 tl_assert( (Addr)p < bk->payload + bk->req_szB );
644 if (bk->payload != (Addr)p) {
645 return; // bogus free
648 retire_Block(bk, True/*because_freed*/);
650 delete_Block_starting_at( bk->payload );
651 if (bk->histoW) {
652 VG_(free)( bk->histoW );
653 bk->histoW = NULL;
655 VG_(free)( bk );
658 static
659 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
661 void* p_new = NULL;
663 tl_assert(new_req_szB > 0); // map 0 to 1
665 if (clo_mode != Heap) {
666 SizeT old_actual_szB = VG_(cli_malloc_usable_size)(p_old);
667 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
668 if (!p_new) {
669 return NULL;
671 VG_(memmove)(p_new, p_old, VG_MIN(old_actual_szB, new_req_szB));
672 VG_(cli_free)(p_old);
673 return p_new;
676 // Find the old block.
677 Block* bk = find_Block_containing( (Addr)p_old );
678 if (!bk) {
679 return NULL; // bogus realloc
682 tl_assert(bk->req_szB > 0);
683 // Assert the block finder is behaving sanely.
684 tl_assert(bk->payload <= (Addr)p_old);
685 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
687 if (bk->payload != (Addr)p_old) {
688 return NULL; // bogus realloc
691 // Keeping the histogram alive in any meaningful way across
692 // block resizing is too darn complicated. Just throw it away.
693 if (bk->histoW) {
694 VG_(free)(bk->histoW);
695 bk->histoW = NULL;
698 // Actually do the allocation, if necessary.
699 if (new_req_szB <= bk->req_szB) {
700 // New size is smaller or same; block not moved.
701 resize_Block(bk->ec, bk->req_szB, new_req_szB);
702 bk->req_szB = new_req_szB;
704 // Update reads/writes for the implicit copy. Even though we didn't
705 // actually do a copy, we act like we did, to match up with the fact
706 // that we treat this as an additional allocation.
707 bk->reads_bytes += new_req_szB;
708 bk->writes_bytes += new_req_szB;
710 p_new = p_old;
712 } else {
713 // New size is bigger; make new block, copy shared contents, free old.
714 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
715 if (!p_new) {
716 // Nb: if realloc fails, NULL is returned but the old block is not
717 // touched. What an awful function.
718 return NULL;
720 tl_assert(p_new != p_old);
722 VG_(memcpy)(p_new, p_old, bk->req_szB);
723 VG_(cli_free)(p_old);
725 // Since the block has moved, we need to re-insert it into the
726 // interval tree at the new place. Do this by removing
727 // and re-adding it.
728 delete_Block_starting_at( (Addr)p_old );
729 // Now 'bk' is no longer in the tree, but the Block itself
730 // is still alive.
732 // Update reads/writes for the copy.
733 bk->reads_bytes += bk->req_szB;
734 bk->writes_bytes += bk->req_szB;
736 // Update the metadata.
737 resize_Block(bk->ec, bk->req_szB, new_req_szB);
738 bk->payload = (Addr)p_new;
739 bk->req_szB = new_req_szB;
741 // And re-add it to the interval tree.
742 Bool present
743 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
744 tl_assert(!present);
745 fbc_cache0 = fbc_cache1 = fbc_cache2 = NULL;
748 return p_new;
751 //------------------------------------------------------------//
752 //--- malloc() et al replacement wrappers ---//
753 //------------------------------------------------------------//
755 static void* dh_malloc ( ThreadId tid, SizeT szB )
757 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
760 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
762 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
765 static void* dh___builtin_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB )
767 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
770 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
772 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
775 static void* dh___builtin_vec_new_aligned ( ThreadId tid, SizeT szB, SizeT alignB )
777 return new_block( tid, NULL, szB, alignB, /*is_zeroed*/False );
780 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
782 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
785 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
787 return new_block( tid, NULL, szB, alignB, False );
790 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
792 die_block(p);
795 static void dh___builtin_delete ( ThreadId tid, void* p )
797 die_block(p);
800 static void dh___builtin_delete_aligned ( ThreadId tid, void* p, SizeT align )
802 die_block(p);
805 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
807 die_block(p);
810 static void dh___builtin_vec_delete_aligned ( ThreadId tid, void* p, SizeT align )
812 die_block(p);
815 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
817 if (p_old == NULL) {
818 return dh_malloc(tid, new_szB);
820 if (new_szB == 0) {
821 if (VG_(clo_realloc_zero_bytes_frees) == True) {
822 dh_free(tid, p_old);
823 return NULL;
825 new_szB = 1;
827 return renew_block(tid, p_old, new_szB);
830 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
832 if (clo_mode != Heap) {
833 return VG_(cli_malloc_usable_size)(p);
836 Block* bk = find_Block_containing( (Addr)p );
837 return bk ? bk->req_szB : 0;
840 //------------------------------------------------------------//
841 //--- memory references ---//
842 //------------------------------------------------------------//
844 static
845 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
847 UWord i, offMin, offMax1;
848 offMin = addr - bk->payload;
849 tl_assert(offMin < bk->req_szB);
850 offMax1 = offMin + szB;
851 if (offMax1 > bk->req_szB)
852 offMax1 = bk->req_szB;
853 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
854 for (i = offMin; i < offMax1; i++) {
855 UShort n = bk->histoW[i];
856 if (n < 0xFFFF) n++;
857 bk->histoW[i] = n;
861 static VG_REGPARM(2)
862 void dh_handle_write ( Addr addr, UWord szB )
864 tl_assert(clo_mode == Heap);
866 Block* bk = find_Block_containing(addr);
867 if (bk) {
868 bk->writes_bytes += szB;
869 if (bk->histoW)
870 inc_histo_for_block(bk, addr, szB);
874 static VG_REGPARM(2)
875 void dh_handle_read ( Addr addr, UWord szB )
877 tl_assert(clo_mode == Heap);
879 Block* bk = find_Block_containing(addr);
880 if (bk) {
881 bk->reads_bytes += szB;
882 if (bk->histoW)
883 inc_histo_for_block(bk, addr, szB);
887 // Handle reads and writes by syscalls (read == kernel
888 // reads user space, write == kernel writes user space).
889 // Assumes no such read or write spans a heap block
890 // boundary and so we can treat it just as one giant
891 // read or write.
892 static
893 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
894 Addr base, SizeT size )
896 tl_assert(clo_mode == Heap);
898 switch (part) {
899 case Vg_CoreSysCall:
900 dh_handle_read(base, size);
901 break;
902 case Vg_CoreSysCallArgInMem:
903 break;
904 case Vg_CoreTranslate:
905 break;
906 default:
907 tl_assert(0);
911 static
912 void dh_handle_noninsn_read_asciiz(CorePart part, ThreadId tid, const HChar* s,
913 Addr str)
915 tl_assert(clo_mode == Heap);
917 tl_assert(part == Vg_CoreSysCall);
918 dh_handle_noninsn_read(part, tid, s, str, VG_(strlen)((const HChar*)str+1));
921 static
922 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
923 Addr base, SizeT size )
925 tl_assert(clo_mode == Heap);
927 switch (part) {
928 case Vg_CoreSysCall:
929 case Vg_CoreClientReq:
930 dh_handle_write(base, size);
931 break;
932 case Vg_CoreSignal:
933 break;
934 default:
935 tl_assert(0);
939 //------------------------------------------------------------//
940 //--- Instrumentation ---//
941 //------------------------------------------------------------//
943 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
944 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
945 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
946 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
947 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
949 static
950 void add_counter_update(IRSB* sbOut, Int n)
952 #if defined(VG_BIGENDIAN)
953 # define END Iend_BE
954 #elif defined(VG_LITTLEENDIAN)
955 # define END Iend_LE
956 #else
957 # error "Unknown endianness"
958 #endif
959 // Add code to increment 'g_curr_instrs' by 'n', like this:
960 // WrTmp(t1, Load64(&g_curr_instrs))
961 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
962 // Store(&g_curr_instrs, t2)
963 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
964 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
965 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_curr_instrs );
967 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
968 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
969 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
971 addStmtToIRSB( sbOut, st1 );
972 addStmtToIRSB( sbOut, st2 );
973 addStmtToIRSB( sbOut, st3 );
976 static
977 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
978 Int goff_sp)
980 if (clo_mode != Heap) {
981 return;
984 IRType tyAddr = Ity_INVALID;
985 const HChar* hName= NULL;
986 void* hAddr = NULL;
987 IRExpr** argv = NULL;
988 IRDirty* di = NULL;
990 const Int THRESH = 4096 * 4; // somewhat arbitrary
991 const Int rz_szB = VG_STACK_REDZONE_SZB;
993 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
994 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
996 if (isWrite) {
997 hName = "dh_handle_write";
998 hAddr = &dh_handle_write;
999 } else {
1000 hName = "dh_handle_read";
1001 hAddr = &dh_handle_read;
1004 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
1006 /* Add the helper. */
1007 tl_assert(hName);
1008 tl_assert(hAddr);
1009 tl_assert(argv);
1010 di = unsafeIRDirty_0_N( 2/*regparms*/,
1011 hName, VG_(fnptr_to_fnentry)( hAddr ),
1012 argv );
1014 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
1015 some arbitrary N. If that fails then addr is in the range (SP -
1016 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
1017 addr is within a page of SP and so can't possibly be a heap
1018 access, and so can be skipped. */
1019 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
1020 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
1022 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
1023 addStmtToIRSB(
1024 sbOut,
1025 assign(sp_minus_rz,
1026 tyAddr == Ity_I32
1027 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
1028 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
1031 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
1032 addStmtToIRSB(
1033 sbOut,
1034 assign(diff,
1035 tyAddr == Ity_I32
1036 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
1037 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
1040 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
1041 addStmtToIRSB(
1042 sbOut,
1043 assign(guard,
1044 tyAddr == Ity_I32
1045 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
1046 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
1048 di->guard = mkexpr(guard);
1050 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
1053 static
1054 IRSB* dh_instrument ( VgCallbackClosure* closure,
1055 IRSB* sbIn,
1056 const VexGuestLayout* layout,
1057 const VexGuestExtents* vge,
1058 const VexArchInfo* archinfo_host,
1059 IRType gWordTy, IRType hWordTy )
1061 Int i, n = 0;
1062 IRSB* sbOut;
1063 IRTypeEnv* tyenv = sbIn->tyenv;
1065 const Int goff_sp = layout->offset_SP;
1067 // We increment the instruction count in two places:
1068 // - just before any Ist_Exit statements;
1069 // - just before the IRSB's end.
1070 // In the former case, we zero 'n' and then continue instrumenting.
1072 sbOut = deepCopyIRSBExceptStmts(sbIn);
1074 // Copy verbatim any IR preamble preceding the first IMark
1075 i = 0;
1076 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
1077 addStmtToIRSB( sbOut, sbIn->stmts[i] );
1078 i++;
1081 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1082 IRStmt* st = sbIn->stmts[i];
1084 if (!st || st->tag == Ist_NoOp) continue;
1086 switch (st->tag) {
1088 case Ist_IMark: {
1089 n++;
1090 break;
1093 case Ist_Exit: {
1094 if (n > 0) {
1095 // Add an increment before the Exit statement, then reset 'n'.
1096 add_counter_update(sbOut, n);
1097 n = 0;
1099 break;
1102 case Ist_WrTmp: {
1103 IRExpr* data = st->Ist.WrTmp.data;
1104 if (data->tag == Iex_Load) {
1105 IRExpr* aexpr = data->Iex.Load.addr;
1106 // Note also, endianness info is ignored. I guess
1107 // that's not interesting.
1108 addMemEvent( sbOut, False/*!isWrite*/,
1109 sizeofIRType(data->Iex.Load.ty),
1110 aexpr, goff_sp );
1112 break;
1115 case Ist_Store: {
1116 IRExpr* data = st->Ist.Store.data;
1117 IRExpr* aexpr = st->Ist.Store.addr;
1118 addMemEvent( sbOut, True/*isWrite*/,
1119 sizeofIRType(typeOfIRExpr(tyenv, data)),
1120 aexpr, goff_sp );
1121 break;
1124 case Ist_Dirty: {
1125 Int dataSize;
1126 IRDirty* d = st->Ist.Dirty.details;
1127 if (d->mFx != Ifx_None) {
1128 /* This dirty helper accesses memory. Collect the details. */
1129 tl_assert(d->mAddr != NULL);
1130 tl_assert(d->mSize != 0);
1131 dataSize = d->mSize;
1132 // Large (eg. 28B, 108B, 512B on x86) data-sized
1133 // instructions will be done inaccurately, but they're
1134 // very rare and this avoids errors from hitting more
1135 // than two cache lines in the simulation.
1136 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1137 addMemEvent( sbOut, False/*!isWrite*/,
1138 dataSize, d->mAddr, goff_sp );
1139 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1140 addMemEvent( sbOut, True/*isWrite*/,
1141 dataSize, d->mAddr, goff_sp );
1142 } else {
1143 tl_assert(d->mAddr == NULL);
1144 tl_assert(d->mSize == 0);
1146 break;
1149 case Ist_CAS: {
1150 /* We treat it as a read and a write of the location. I
1151 think that is the same behaviour as it was before IRCAS
1152 was introduced, since prior to that point, the Vex
1153 front ends would translate a lock-prefixed instruction
1154 into a (normal) read followed by a (normal) write. */
1155 Int dataSize;
1156 IRCAS* cas = st->Ist.CAS.details;
1157 tl_assert(cas->addr != NULL);
1158 tl_assert(cas->dataLo != NULL);
1159 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1160 if (cas->dataHi != NULL)
1161 dataSize *= 2; /* since it's a doubleword-CAS */
1162 addMemEvent( sbOut, False/*!isWrite*/,
1163 dataSize, cas->addr, goff_sp );
1164 addMemEvent( sbOut, True/*isWrite*/,
1165 dataSize, cas->addr, goff_sp );
1166 break;
1169 case Ist_LLSC: {
1170 IRType dataTy;
1171 if (st->Ist.LLSC.storedata == NULL) {
1172 /* LL */
1173 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1174 addMemEvent( sbOut, False/*!isWrite*/,
1175 sizeofIRType(dataTy),
1176 st->Ist.LLSC.addr, goff_sp );
1177 } else {
1178 /* SC */
1179 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1180 addMemEvent( sbOut, True/*isWrite*/,
1181 sizeofIRType(dataTy),
1182 st->Ist.LLSC.addr, goff_sp );
1184 break;
1187 default:
1188 break;
1191 addStmtToIRSB( sbOut, st );
1194 if (n > 0) {
1195 // Add an increment before the SB end.
1196 add_counter_update(sbOut, n);
1198 return sbOut;
1201 #undef binop
1202 #undef mkexpr
1203 #undef mkU32
1204 #undef mkU64
1205 #undef assign
1207 //------------------------------------------------------------//
1208 //--- Client requests ---//
1209 //------------------------------------------------------------//
1211 static Bool dh_handle_client_request(ThreadId tid, UWord* arg, UWord* ret)
1213 switch (arg[0]) {
1214 case VG_USERREQ__DHAT_AD_HOC_EVENT: {
1215 if (clo_mode != AdHoc) {
1216 return False;
1219 SizeT len = (SizeT)arg[1];
1221 // Only the ec and req_szB fields are used by intro_Block().
1222 Block bk;
1223 VG_(memset)(&bk, 0, sizeof(bk));
1224 bk.req_szB = len;
1225 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1227 intro_Block(&bk);
1229 return True;
1232 case _VG_USERREQ__DHAT_COPY: {
1233 SizeT len = (SizeT)arg[1];
1235 if (clo_mode != Copy) {
1236 return False;
1239 // Only the ec and req_szB fields are used by intro_Block().
1240 Block bk;
1241 VG_(memset)(&bk, 0, sizeof(bk));
1242 bk.req_szB = len;
1243 bk.ec = VG_(record_ExeContext)(tid, 0/*first word delta*/);
1245 intro_Block(&bk);
1247 return True;
1250 default:
1251 VG_(message)(
1252 Vg_UserMsg,
1253 "Warning: unknown DHAT client request code %llx\n",
1254 (ULong)arg[0]
1256 return False;
1260 //------------------------------------------------------------//
1261 //--- Finalisation ---//
1262 //------------------------------------------------------------//
1264 // File format notes.
1266 // - The files are JSON, because it's a widely-used format and saves us having
1267 // to write a parser in dh_view.js.
1269 // - We use a comma-first style for the generated JSON. Comma-first style
1270 // moves the special case for arrays/objects from the last item to the
1271 // first. This helps in cases where you can't easily tell in advance the
1272 // size of arrays/objects, such as iterating over a WordFM (because
1273 // VG_(sizeFM) is O(n) rather than O(1)), and iterating over stack frames
1274 // using VG_(apply_ExeContext) in combination with an InlIpCursor.
1276 // - We use short field names and minimal whitespace to minimize file sizes.
1278 // Sample output:
1280 // {
1281 // // Version number of the format. Incremented on each
1282 // // backwards-incompatible change. A mandatory integer.
1283 // "dhatFileVersion": 2,
1285 // // The invocation mode. A mandatory, free-form string.
1286 // "mode": "heap",
1288 // // The verb used before above stack frames, i.e. "<verb> at {". A
1289 // // mandatory string.
1290 // "verb": "Allocated",
1292 // // Are block lifetimes recorded? Affects whether some other fields are
1293 // // present. A mandatory boolean.
1294 // "bklt": true,
1296 // // Are block accesses recorded? Affects whether some other fields are
1297 // // present. A mandatory boolean.
1298 // "bkacc": true,
1300 // // Byte/bytes/blocks-position units. Optional strings. "byte", "bytes",
1301 // // and "blocks" are the values used if these fields are omitted.
1302 // "bu": "byte", "bsu": "bytes", "bksu": "blocks",
1304 // // Time units (individual and 1,000,000x). Mandatory strings.
1305 // "tu": "instrs", "Mtu": "Minstr"
1307 // // The "short-lived" time threshold, measures in "tu"s.
1308 // // - bklt=true: a mandatory integer.
1309 // // - bklt=false: omitted.
1310 // "tuth": 500,
1312 // // The executed command. A mandatory string.
1313 // "cmd": "date",
1315 // // The process ID. A mandatory integer.
1316 // "pid": 61129
1318 // // The time at the end of execution (t-end). A mandatory integer.
1319 // "te": 350682
1321 // // The time of the global max (t-gmax).
1322 // // - bklt=true: a mandatory integer.
1323 // // - bklt=false: omitted.
1324 // "tg": 331312,
1326 // // The program points. A mandatory array.
1327 // "pps": [
1328 // {
1329 // // Total bytes and blocks. Mandatory integers.
1330 // "tb": 5, "tbk": 1,
1332 // // Total lifetimes of all blocks allocated at this PP.
1333 // // - bklt=true: a mandatory integer.
1334 // // - bklt=false: omitted.
1335 // "tl": 274,
1337 // // The maximum bytes and blocks for this PP.
1338 // // - bklt=true: mandatory integers.
1339 // // - bklt=false: omitted.
1340 // "mb": 5, "mbk": 1,
1342 // // The bytes and blocks at t-gmax for this PP.
1343 // // - bklt=true: mandatory integers.
1344 // // - bklt=false: omitted.
1345 // "gb": 0, "gbk": 0,
1347 // // The bytes and blocks at t-end for this PP.
1348 // // - bklt=true: mandatory integers.
1349 // // - bklt=false: omitted.
1350 // "eb": 0, "ebk": 0,
1352 // // The reads and writes of blocks for this PP.
1353 // // - bkacc=true: mandatory integers.
1354 // // - bkacc=false: omitted.
1355 // "rb": 41, "wb": 5,
1357 // // The exact accesses of blocks for this PP. Only used when all
1358 // // allocations are the same size and sufficiently small. A negative
1359 // // element indicates run-length encoding of the following integer.
1360 // // E.g. `-3, 4` means "three 4s in a row".
1361 // // - bkacc=true: an optional array of integers.
1362 // // - bkacc=false: omitted.
1363 // "acc": [5, -3, 4, 2],
1365 // // Frames. Each element is an index into the "ftbl" array below.
1366 // // - All modes: A mandatory array of integers.
1367 // "fs": [1, 2, 3]
1368 // }
1369 // ],
1371 // // Frame table. A mandatory array of strings.
1372 // "ftbl": [
1373 // "[root]",
1374 // "0x4AA1D9F: _nl_normalize_codeset (l10nflist.c:332)",
1375 // "0x4A9B414: _nl_load_locale_from_archive (loadarchive.c:173)",
1376 // "0x4A9A2BE: _nl_find_locale (findlocale.c:153)"
1377 // ]
1378 // }
1380 static VgFile* fp;
1382 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
1384 // The frame table holds unique frames.
1385 static WordFM* frame_tbl = NULL;
1386 static UWord next_frame_n = 0;
1388 static Word frame_cmp(UWord a, UWord b)
1390 return VG_(strcmp)((const HChar*)a, (const HChar*)b);
1393 static HChar hex_digit_to_ascii_char(UChar d)
1395 d = d & 0xf;
1396 return (d < 10) ? ('0' + d) : ('a' + (d - 10));
1399 // For JSON, we must escape double quote, backslash, and 0x00..0x1f.
1401 // Returns the original string if no escaping was required. Returns a pointer
1402 // to a static buffer if escaping was required. Therefore, the return value is
1403 // only valid until the next call to this function.
1404 static const HChar* json_escape(const HChar* s)
1406 static HChar* buf = NULL;
1407 static SizeT bufcap = 0;
1409 // Do we need any escaping?
1410 SizeT extra = 0;
1411 const HChar* p = s;
1412 while (*p) {
1413 UChar c = *p;
1414 if (c == '"' || c == '\\') {
1415 extra += 1;
1416 } else if (c <= 0x1f) {
1417 extra += 5;
1419 p++;
1421 SizeT len = p - s;
1423 if (extra == 0) {
1424 // No escaping needed.
1425 return s;
1428 // Escaping needed. (The +1 is for the NUL terminator.) Enlarge buf if
1429 // necessary.
1430 SizeT newcap = len + extra + 1;
1431 if (bufcap < newcap) {
1432 buf = VG_(realloc)("dh.json", buf, newcap);
1433 bufcap = newcap;
1436 p = s;
1437 HChar* q = buf;
1438 while (*p) {
1439 UChar c = *p;
1440 if (c == '"') {
1441 *q++ = '\\';
1442 *q++ = '"';
1443 } else if (c == '\\') {
1444 *q++ = '\\';
1445 *q++ = '\\';
1446 } else if (c <= 0x1f) {
1447 *q++ = '\\';
1448 *q++ = 'u';
1449 *q++ = '0';
1450 *q++ = '0';
1451 *q++ = hex_digit_to_ascii_char((c & 0x00f0) >> 4);
1452 *q++ = hex_digit_to_ascii_char(c & 0x000f);
1453 } else {
1454 *q++ = c;
1456 p++;
1458 *q = '\0';
1460 return buf;
1463 static void write_PPInfo_frame(UInt n, DiEpoch ep, Addr ip, void* opaque)
1465 Bool* is_first = (Bool*)opaque;
1466 InlIPCursor* iipc = VG_(new_IIPC)(ep, ip);
1468 do {
1469 const HChar* buf = VG_(describe_IP)(ep, ip, iipc);
1471 // Skip entries in vg_replace_malloc.c (e.g. `malloc`, `calloc`,
1472 // `realloc`, `operator new`) because they're boring and clog up the
1473 // output.
1474 if (VG_(strstr)(buf, "vg_replace_malloc.c")) {
1475 continue;
1478 // If this description has been seen before, get its number. Otherwise,
1479 // give it a new number and put it in the table.
1480 UWord keyW = 0, valW = 0;
1481 UWord frame_n = 0;
1482 Bool found = VG_(lookupFM)(frame_tbl, &keyW, &valW, (UWord)buf);
1483 if (found) {
1484 //const HChar* str = (const HChar*)keyW;
1485 //tl_assert(0 == VG_(strcmp)(buf, str));
1486 frame_n = valW;
1487 } else {
1488 // `buf` is a static buffer, we must copy it.
1489 const HChar* str = VG_(strdup)("dh.frame_tbl.3", buf);
1490 frame_n = next_frame_n++;
1491 Bool present = VG_(addToFM)(frame_tbl, (UWord)str, frame_n);
1492 tl_assert(!present);
1495 FP("%c%lu", *is_first ? '[' : ',', frame_n);
1496 *is_first = False;
1498 } while (VG_(next_IIPC)(iipc));
1500 VG_(delete_IIPC)(iipc);
1503 static void write_PPInfo(PPInfo* ppi, Bool is_first)
1505 FP(" %c{\"tb\":%llu,\"tbk\":%llu\n",
1506 is_first ? '[' : ',',
1507 ppi->total_bytes, ppi->total_blocks);
1509 if (clo_mode == Heap) {
1510 tl_assert(ppi->total_blocks >= ppi->max_blocks);
1511 tl_assert(ppi->total_bytes >= ppi->max_bytes);
1513 FP(" ,\"tl\":%llu\n",
1514 ppi->total_lifetimes_instrs);
1515 FP(" ,\"mb\":%llu,\"mbk\":%llu\n",
1516 ppi->max_bytes, ppi->max_blocks);
1517 FP(" ,\"gb\":%llu,\"gbk\":%llu\n",
1518 ppi->at_tgmax_bytes, ppi->at_tgmax_blocks);
1519 FP(" ,\"eb\":%llu,\"ebk\":%llu\n",
1520 ppi->curr_bytes, ppi->curr_blocks);
1521 FP(" ,\"rb\":%llu,\"wb\":%llu\n",
1522 ppi->reads_bytes, ppi->writes_bytes);
1524 if (ppi->histo && ppi->xsize_tag == Exactly) {
1525 FP(" ,\"acc\":[");
1527 // Simple run-length encoding: when N entries in a row have the same
1528 // value M, we print "-N,M". If there is just one in a row, we just
1529 // print "M". This reduces file size significantly.
1530 UShort repval = 0;
1531 Int reps = 0;
1532 for (UWord i = 0; i < ppi->xsize; i++) {
1533 UShort h = ppi->histo[i];
1534 if (repval == h) {
1535 // Continue current run.
1536 reps++;
1537 } else {
1538 // End of run; print it.
1539 if (reps == 1) {
1540 FP("%u,", repval);
1541 } else if (reps > 1) {
1542 FP("-%d,%u,", reps, repval);
1544 reps = 1;
1545 repval = h;
1548 // Print the final run.
1549 if (reps == 1) {
1550 FP("%u", repval);
1551 } else if (reps > 1) {
1552 FP("-%d,%u", reps, repval);
1555 FP("]\n");
1557 } else {
1558 tl_assert(ppi->curr_bytes == 0);
1559 tl_assert(ppi->curr_blocks == 0);
1560 tl_assert(ppi->max_bytes == 0);
1561 tl_assert(ppi->max_blocks == 0);
1562 tl_assert(ppi->at_tgmax_bytes == 0);
1563 tl_assert(ppi->at_tgmax_blocks == 0);
1564 tl_assert(ppi->total_lifetimes_instrs == 0);
1565 tl_assert(ppi->freed_blocks == 0);
1566 tl_assert(ppi->reads_bytes == 0);
1567 tl_assert(ppi->writes_bytes == 0);
1568 tl_assert(ppi->xsize_tag == 0);
1569 tl_assert(ppi->xsize == 0);
1570 tl_assert(ppi->histo == NULL);
1573 FP(" ,\"fs\":");
1574 Bool is_first_frame = True;
1575 VG_(apply_ExeContext)(write_PPInfo_frame, &is_first_frame, ppi->ec);
1576 FP("]\n");
1578 FP(" }\n");
1581 static void write_PPInfos(void)
1583 UWord keyW, valW;
1585 FP(",\"pps\":\n");
1587 VG_(initIterFM)(ppinfo);
1588 Bool is_first = True;
1589 while (VG_(nextIterFM)(ppinfo, &keyW, &valW)) {
1590 PPInfo* ppi = (PPInfo*)valW;
1591 tl_assert(ppi && ppi->ec == (ExeContext*)keyW);
1592 write_PPInfo(ppi, is_first);
1593 is_first = False;
1595 VG_(doneIterFM)(ppinfo);
1597 if (is_first) {
1598 // We didn't print any elements. This happens if ppinfo is empty.
1599 FP(" [\n");
1602 FP(" ]\n");
1605 static void dh_fini(Int exit_status)
1607 // This function does lots of allocations that it doesn't bother to free,
1608 // because execution is almost over anyway.
1610 UWord keyW, valW;
1612 // Total bytes might be at a possible peak.
1613 if (clo_mode == Heap) {
1614 check_for_peak();
1616 // Before printing statistics, we must harvest various stats (such as
1617 // lifetimes and accesses) for all the blocks that are still alive.
1618 VG_(initIterFM)( interval_tree );
1619 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1620 Block* bk = (Block*)keyW;
1621 tl_assert(valW == 0);
1622 tl_assert(bk);
1623 retire_Block(bk, False/*!because_freed*/);
1625 VG_(doneIterFM)( interval_tree );
1627 // Stats.
1628 if (VG_(clo_stats)) {
1629 VG_(dmsg)(" dhat: find_Block_containing:\n");
1630 VG_(dmsg)(" dhat: found: %'lu\n",
1631 stats__n_fBc_cached0 + stats__n_fBc_cached1
1632 + stats__n_fBc_cached2
1633 + stats__n_fBc_uncached);
1634 VG_(dmsg)(" dhat: at cache0 %'14lu at cache1 %'14lu\n",
1635 stats__n_fBc_cached0,
1636 stats__n_fBc_cached1);
1637 VG_(dmsg)(" dhat: at cache2 %'14lu uncached %'14lu\n",
1638 stats__n_fBc_cached2,
1639 stats__n_fBc_uncached);
1640 VG_(dmsg)(" dhat: notfound: %'lu\n", stats__n_fBc_notfound);
1641 VG_(dmsg)("\n");
1645 // Create the frame table, and insert the special "[root]" node at index 0.
1646 frame_tbl = VG_(newFM)(VG_(malloc),
1647 "dh.frame_tbl.1",
1648 VG_(free),
1649 frame_cmp);
1650 const HChar* root = VG_(strdup)("dh.frame_tbl.2", "[root]");
1651 Bool present = VG_(addToFM)(frame_tbl, (UWord)root, 0);
1652 tl_assert(!present);
1653 next_frame_n = 1;
1655 // Setup output filename. Nb: it's important to do this now, i.e. as late
1656 // as possible. If we do it at start-up and the program forks and the
1657 // output file format string contains a %p (pid) specifier, both the parent
1658 // and child will incorrectly write to the same file; this happened in
1659 // 3.3.0.
1660 HChar* dhat_out_file =
1661 VG_(expand_file_name)("--dhat-out-file", clo_dhat_out_file);
1663 fp = VG_(fopen)(dhat_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1664 VKI_S_IRUSR|VKI_S_IWUSR);
1665 if (!fp) {
1666 VG_(umsg)("error: can't open DHAT output file '%s'\n", dhat_out_file);
1667 VG_(free)(dhat_out_file);
1668 return;
1671 // Write to data file.
1672 FP("{\"dhatFileVersion\":2\n");
1674 // The output mode, block booleans, and byte/block units.
1675 if (clo_mode == Heap) {
1676 FP(",\"mode\":\"heap\",\"verb\":\"Allocated\"\n");
1677 FP(",\"bklt\":true,\"bkacc\":true\n");
1678 } else if (clo_mode == Copy) {
1679 FP(",\"mode\":\"copy\",\"verb\":\"Copied\"\n");
1680 FP(",\"bklt\":false,\"bkacc\":false\n");
1681 } else if (clo_mode == AdHoc) {
1682 FP(",\"mode\":\"ad-hoc\",\"verb\":\"Occurred\"\n");
1683 FP(",\"bklt\":false,\"bkacc\":false\n");
1684 FP(",\"bu\":\"unit\",\"bsu\":\"units\",\"bksu\":\"events\"\n");
1685 } else {
1686 tl_assert(False);
1689 // The time units.
1690 FP(",\"tu\":\"instrs\",\"Mtu\":\"Minstr\"\n");
1691 if (clo_mode == Heap) {
1692 FP(",\"tuth\":500\n");
1695 // The command.
1696 const HChar* exe = VG_(args_the_exename);
1697 FP(",\"cmd\":\"%s", json_escape(exe));
1698 for (Word i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
1699 const HChar* arg = *(HChar**)VG_(indexXA)(VG_(args_for_client), i);
1700 FP(" %s", json_escape(arg));
1702 FP("\"\n");
1704 // The PID.
1705 FP(",\"pid\":%d\n", VG_(getpid)());
1707 // Times.
1708 FP(",\"te\":%llu\n", g_curr_instrs);
1709 if (clo_mode == Heap) {
1710 FP(",\"tg\":%llu\n", g_tgmax_instrs);
1711 } else {
1712 tl_assert(g_tgmax_instrs == 0);
1715 // APs.
1716 write_PPInfos();
1718 // Frame table.
1719 FP(",\"ftbl\":\n");
1721 // The frame table maps strings to numbers. We want to print it ordered by
1722 // numbers. So we create an array and fill it in from the frame table, then
1723 // print that.
1724 UWord n_frames = next_frame_n;
1725 const HChar** frames =
1726 VG_(malloc)("dh.frames", n_frames * sizeof(const HChar*));
1727 VG_(initIterFM)(frame_tbl);
1728 while (VG_(nextIterFM)(frame_tbl, &keyW, &valW)) {
1729 const HChar* str = (const HChar*)keyW;
1730 UWord n = valW;
1731 frames[n] = str;
1733 VG_(doneIterFM)(frame_tbl);
1735 for (UWord i = 0; i < n_frames; i++) {
1736 FP(" %c\"%s\"\n", i == 0 ? '[' : ',', json_escape(frames[i]));
1738 FP(" ]\n");
1739 VG_(free)(frames);
1741 FP("}\n");
1743 VG_(fclose)(fp);
1744 fp = NULL;
1746 if (VG_(clo_verbosity) == 0) {
1747 return;
1750 // Print brief global stats.
1751 VG_(umsg)("Total: %'llu %s in %'llu %s\n",
1752 g_total_bytes, clo_mode == AdHoc ? "units" : "bytes",
1753 g_total_blocks, clo_mode == AdHoc ? "events" : "blocks");
1754 if (clo_mode == Heap) {
1755 VG_(umsg)("At t-gmax: %'llu bytes in %'llu blocks\n",
1756 g_max_bytes, g_max_blocks);
1757 VG_(umsg)("At t-end: %'llu bytes in %'llu blocks\n",
1758 g_curr_bytes, g_curr_blocks);
1759 VG_(umsg)("Reads: %'llu bytes\n", g_reads_bytes);
1760 VG_(umsg)("Writes: %'llu bytes\n", g_writes_bytes);
1761 } else {
1762 tl_assert(g_max_bytes == 0);
1763 tl_assert(g_max_blocks == 0);
1764 tl_assert(g_curr_bytes == 0);
1765 tl_assert(g_curr_blocks == 0);
1766 tl_assert(g_reads_bytes == 0);
1767 tl_assert(g_writes_bytes == 0);
1770 // Print a how-to-view-the-profile hint.
1771 VG_(umsg)("\n");
1772 VG_(umsg)("To view the resulting profile, open\n");
1773 VG_(umsg)(" file://%s/%s\n", DHAT_VIEW_DIR, "dh_view.html");
1774 VG_(umsg)("in a web browser, click on \"Load...\", "
1775 "and then select the file\n");
1776 VG_(umsg)(" %s\n", dhat_out_file);
1777 VG_(umsg)("The text at the bottom explains the abbreviations used in the "
1778 "output.\n");
1780 VG_(free)(dhat_out_file);
1783 //------------------------------------------------------------//
1784 //--- Initialisation ---//
1785 //------------------------------------------------------------//
1787 static void dh_post_clo_init(void)
1789 if (clo_mode == Heap) {
1790 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1791 VG_(track_pre_mem_read_asciiz) ( dh_handle_noninsn_read_asciiz );
1792 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1796 static void dh_pre_clo_init(void)
1798 VG_(details_name) ("DHAT");
1799 VG_(details_version) (NULL);
1800 VG_(details_description) ("a dynamic heap analysis tool");
1801 VG_(details_copyright_author)(
1802 "Copyright (C) 2010-2018, and GNU GPL'd, by Mozilla Foundation");
1803 VG_(details_bug_reports_to) (VG_BUGS_TO);
1804 VG_(details_avg_translation_sizeB) ( 600 );
1806 // Basic functions.
1807 VG_(basic_tool_funcs) (dh_post_clo_init,
1808 dh_instrument,
1809 dh_fini);
1811 // Needs.
1812 VG_(needs_libc_freeres)();
1813 VG_(needs_cxx_freeres)();
1814 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1815 dh_print_usage,
1816 dh_print_debug_usage);
1817 VG_(needs_client_requests) (dh_handle_client_request);
1818 // VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1819 // dh_expensive_sanity_check);
1820 VG_(needs_malloc_replacement)(dh_malloc,
1821 dh___builtin_new,
1822 dh___builtin_new_aligned,
1823 dh___builtin_vec_new,
1824 dh___builtin_vec_new_aligned,
1825 dh_memalign,
1826 dh_calloc,
1827 dh_free,
1828 dh___builtin_delete,
1829 dh___builtin_delete_aligned,
1830 dh___builtin_vec_delete,
1831 dh___builtin_vec_delete_aligned,
1832 dh_realloc,
1833 dh_malloc_usable_size,
1834 0 );
1836 tl_assert(!interval_tree);
1837 tl_assert(!fbc_cache0);
1838 tl_assert(!fbc_cache1);
1839 tl_assert(!fbc_cache2);
1841 interval_tree = VG_(newFM)( VG_(malloc),
1842 "dh.interval_tree.1",
1843 VG_(free),
1844 interval_tree_Cmp );
1846 ppinfo = VG_(newFM)( VG_(malloc),
1847 "dh.ppinfo.1",
1848 VG_(free),
1849 NULL/*unboxedcmp*/ );
1852 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1854 //--------------------------------------------------------------------//
1855 //--- end dh_main.c ---//
1856 //--------------------------------------------------------------------//